All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.datastream.ConnectedStreams Maven / Gradle / Ivy

There is a newer version: 2.0-preview1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.datastream;

import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.Public;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.Utils;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoFlatMapFunction;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import org.apache.flink.streaming.api.functions.co.CoProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction;
import org.apache.flink.streaming.api.operators.SimpleOperatorFactory;
import org.apache.flink.streaming.api.operators.StreamOperatorFactory;
import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
import org.apache.flink.streaming.api.operators.TwoInputStreamOperatorFactory;
import org.apache.flink.streaming.api.operators.co.CoProcessOperator;
import org.apache.flink.streaming.api.operators.co.CoStreamFlatMap;
import org.apache.flink.streaming.api.operators.co.CoStreamMap;
import org.apache.flink.streaming.api.operators.co.KeyedCoProcessOperator;
import org.apache.flink.streaming.api.operators.co.LegacyKeyedCoProcessOperator;
import org.apache.flink.streaming.api.transformations.TwoInputTransformation;

import static java.util.Objects.requireNonNull;

/**
 * ConnectedStreams represent two connected streams of (possibly) different data types. Connected
 * streams are useful for cases where operations on one stream directly affect the operations on the
 * other stream, usually via shared state between the streams.
 *
 * 

An example for the use of connected streams would be to apply rules that change over time onto * another stream. One of the connected streams has the rules, the other stream the elements to * apply the rules to. The operation on the connected stream maintains the current set of rules in * the state. It may receive either a rule update and update the state or a data element and apply * the rules in the state to the element. * *

The connected stream can be conceptually viewed as a union stream of an Either type, that * holds either the first stream's type or the second stream's type. * * @param Type of the first input data steam. * @param Type of the second input data stream. */ @Public public class ConnectedStreams { protected final StreamExecutionEnvironment environment; protected final DataStream inputStream1; protected final DataStream inputStream2; protected ConnectedStreams( StreamExecutionEnvironment env, DataStream input1, DataStream input2) { this.environment = requireNonNull(env); this.inputStream1 = requireNonNull(input1); this.inputStream2 = requireNonNull(input2); } public StreamExecutionEnvironment getExecutionEnvironment() { return environment; } /** * Returns the first {@link DataStream}. * * @return The first DataStream. */ public DataStream getFirstInput() { return inputStream1; } /** * Returns the second {@link DataStream}. * * @return The second DataStream. */ public DataStream getSecondInput() { return inputStream2; } /** * Gets the type of the first input. * * @return The type of the first input */ public TypeInformation getType1() { return inputStream1.getType(); } /** * Gets the type of the second input. * * @return The type of the second input */ public TypeInformation getType2() { return inputStream2.getType(); } /** * KeyBy operation for connected data stream. Assigns keys to the elements of input1 and input2 * according to keyPosition1 and keyPosition2. * * @param keyPosition1 The field used to compute the hashcode of the elements in the first input * stream. * @param keyPosition2 The field used to compute the hashcode of the elements in the second * input stream. * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams keyBy(int keyPosition1, int keyPosition2) { return new ConnectedStreams<>( this.environment, inputStream1.keyBy(keyPosition1), inputStream2.keyBy(keyPosition2)); } /** * KeyBy operation for connected data stream. Assigns keys to the elements of input1 and input2 * according to keyPositions1 and keyPositions2. * * @param keyPositions1 The fields used to group the first input stream. * @param keyPositions2 The fields used to group the second input stream. * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams keyBy(int[] keyPositions1, int[] keyPositions2) { return new ConnectedStreams<>( environment, inputStream1.keyBy(keyPositions1), inputStream2.keyBy(keyPositions2)); } /** * KeyBy operation for connected data stream using key expressions. Assigns keys to the elements * of input1 and input2 according to field1 and field2. A field expression is either the name of * a public field or a getter method with parentheses of the {@link DataStream}S underlying * type. A dot can be used to drill down into objects, as in {@code "field1.getInnerField2()" }. * * @param field1 The grouping expression for the first input * @param field2 The grouping expression for the second input * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams keyBy(String field1, String field2) { return new ConnectedStreams<>( environment, inputStream1.keyBy(field1), inputStream2.keyBy(field2)); } /** * KeyBy operation for connected data stream using key expressions. the elements of input1 and * input2 according to fields1 and fields2. A field expression is either the name of a public * field or a getter method with parentheses of the {@link DataStream}S underlying type. A dot * can be used to drill down into objects, as in {@code "field1.getInnerField2()" } . * * @param fields1 The grouping expressions for the first input * @param fields2 The grouping expressions for the second input * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams keyBy(String[] fields1, String[] fields2) { return new ConnectedStreams<>( environment, inputStream1.keyBy(fields1), inputStream2.keyBy(fields2)); } /** * KeyBy operation for connected data stream. Assigns keys to the elements of input1 and input2 * using keySelector1 and keySelector2. * * @param keySelector1 The {@link KeySelector} used for grouping the first input * @param keySelector2 The {@link KeySelector} used for grouping the second input * @return The partitioned {@link ConnectedStreams} */ public ConnectedStreams keyBy( KeySelector keySelector1, KeySelector keySelector2) { return new ConnectedStreams<>( environment, inputStream1.keyBy(keySelector1), inputStream2.keyBy(keySelector2)); } /** * KeyBy operation for connected data stream. Assigns keys to the elements of input1 and input2 * using keySelector1 and keySelector2 with explicit type information for the common key type. * * @param keySelector1 The {@link KeySelector} used for grouping the first input * @param keySelector2 The {@link KeySelector} used for grouping the second input * @param keyType The type information of the common key type. * @return The partitioned {@link ConnectedStreams} */ public ConnectedStreams keyBy( KeySelector keySelector1, KeySelector keySelector2, TypeInformation keyType) { return new ConnectedStreams<>( environment, inputStream1.keyBy(keySelector1, keyType), inputStream2.keyBy(keySelector2, keyType)); } /** * Applies a CoMap transformation on a {@link ConnectedStreams} and maps the output to a common * type. The transformation calls a {@link CoMapFunction#map1} for each element of the first * input and {@link CoMapFunction#map2} for each element of the second input. Each CoMapFunction * call returns exactly one element. * * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams * @return The transformed {@link DataStream} */ public SingleOutputStreamOperator map(CoMapFunction coMapper) { TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( coMapper, CoMapFunction.class, 0, 1, 2, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return map(coMapper, outTypeInfo); } /** * Applies a CoMap transformation on a {@link ConnectedStreams} and maps the output to a common * type. The transformation calls a {@link CoMapFunction#map1} for each element of the first * input and {@link CoMapFunction#map2} for each element of the second input. Each CoMapFunction * call returns exactly one element. * * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams * @param outputType {@link TypeInformation} for the result type of the function. * @return The transformed {@link DataStream} */ public SingleOutputStreamOperator map( CoMapFunction coMapper, TypeInformation outputType) { return transform("Co-Map", outputType, new CoStreamMap<>(inputStream1.clean(coMapper))); } /** * Applies a CoFlatMap transformation on a {@link ConnectedStreams} and maps the output to a * common type. The transformation calls a {@link CoFlatMapFunction#flatMap1} for each element * of the first input and {@link CoFlatMapFunction#flatMap2} for each element of the second * input. Each CoFlatMapFunction call returns any number of elements including none. * * @param coFlatMapper The CoFlatMapFunction used to jointly transform the two input DataStreams * @return The transformed {@link DataStream} */ public SingleOutputStreamOperator flatMap(CoFlatMapFunction coFlatMapper) { TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( coFlatMapper, CoFlatMapFunction.class, 0, 1, 2, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return flatMap(coFlatMapper, outTypeInfo); } /** * Applies a CoFlatMap transformation on a {@link ConnectedStreams} and maps the output to a * common type. The transformation calls a {@link CoFlatMapFunction#flatMap1} for each element * of the first input and {@link CoFlatMapFunction#flatMap2} for each element of the second * input. Each CoFlatMapFunction call returns any number of elements including none. * * @param coFlatMapper The CoFlatMapFunction used to jointly transform the two input DataStreams * @param outputType {@link TypeInformation} for the result type of the function. * @return The transformed {@link DataStream} */ public SingleOutputStreamOperator flatMap( CoFlatMapFunction coFlatMapper, TypeInformation outputType) { return transform( "Co-Flat Map", outputType, new CoStreamFlatMap<>(inputStream1.clean(coFlatMapper))); } /** * Applies the given {@link CoProcessFunction} on the connected input streams, thereby creating * a transformed output stream. * *

The function will be called for every element in the input streams and can produce zero or * more output elements. Contrary to the {@link #flatMap(CoFlatMapFunction)} function, this * function can also query the time and set timers. When reacting to the firing of set timers * the function can directly emit elements and/or register yet more timers. * * @param coProcessFunction The {@link CoProcessFunction} that is called for each element in the * stream. * @param The type of elements emitted by the {@code CoProcessFunction}. * @return The transformed {@link DataStream}. */ @PublicEvolving public SingleOutputStreamOperator process( CoProcessFunction coProcessFunction) { TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( coProcessFunction, CoProcessFunction.class, 0, 1, 2, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return process(coProcessFunction, outTypeInfo); } /** * Applies the given {@link CoProcessFunction} on the connected input streams, thereby creating * a transformed output stream. * *

The function will be called for every element in the input streams and can produce zero or * more output elements. Contrary to the {@link #flatMap(CoFlatMapFunction)} function, this * function can also query the time and set timers. When reacting to the firing of set timers * the function can directly emit elements and/or register yet more timers. * * @param coProcessFunction The {@link CoProcessFunction} that is called for each element in the * stream. * @param The type of elements emitted by the {@code CoProcessFunction}. * @return The transformed {@link DataStream}. */ @Internal public SingleOutputStreamOperator process( CoProcessFunction coProcessFunction, TypeInformation outputType) { TwoInputStreamOperator operator; if ((inputStream1 instanceof KeyedStream) && (inputStream2 instanceof KeyedStream)) { operator = new LegacyKeyedCoProcessOperator<>(inputStream1.clean(coProcessFunction)); } else { operator = new CoProcessOperator<>(inputStream1.clean(coProcessFunction)); } return transform("Co-Process", outputType, operator); } /** * Applies the given {@link KeyedCoProcessFunction} on the connected input keyed streams, * thereby creating a transformed output stream. * *

The function will be called for every element in the input keyed streams and can produce * zero or more output elements. Contrary to the {@link #flatMap(CoFlatMapFunction)} function, * this function can also query the time and set timers. When reacting to the firing of set * timers the function can directly emit elements and/or register yet more timers. * * @param keyedCoProcessFunction The {@link KeyedCoProcessFunction} that is called for each * element in the stream. * @param The type of elements emitted by the {@code CoProcessFunction}. * @return The transformed {@link DataStream}. */ @PublicEvolving public SingleOutputStreamOperator process( KeyedCoProcessFunction keyedCoProcessFunction) { TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( keyedCoProcessFunction, KeyedCoProcessFunction.class, 1, 2, 3, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return process(keyedCoProcessFunction, outTypeInfo); } /** * Applies the given {@link KeyedCoProcessFunction} on the connected input streams, thereby * creating a transformed output stream. * *

The function will be called for every element in the input streams and can produce zero or * more output elements. Contrary to the {@link #flatMap(CoFlatMapFunction)} function, this * function can also query the time and set timers. When reacting to the firing of set timers * the function can directly emit elements and/or register yet more timers. * * @param keyedCoProcessFunction The {@link KeyedCoProcessFunction} that is called for each * element in the stream. * @param The type of elements emitted by the {@code CoProcessFunction}. * @return The transformed {@link DataStream}. */ @Internal public SingleOutputStreamOperator process( KeyedCoProcessFunction keyedCoProcessFunction, TypeInformation outputType) { TwoInputStreamOperator operator; if ((inputStream1 instanceof KeyedStream) && (inputStream2 instanceof KeyedStream)) { operator = new KeyedCoProcessOperator<>(inputStream1.clean(keyedCoProcessFunction)); } else { throw new UnsupportedOperationException( "KeyedCoProcessFunction can only be used " + "when both input streams are of type KeyedStream."); } return transform("Co-Keyed-Process", outputType, operator); } @PublicEvolving public SingleOutputStreamOperator transform( String functionName, TypeInformation outTypeInfo, TwoInputStreamOperator operator) { return doTransform(functionName, outTypeInfo, SimpleOperatorFactory.of(operator)); } @PublicEvolving public SingleOutputStreamOperator transform( String functionName, TypeInformation outTypeInfo, TwoInputStreamOperatorFactory operatorFactory) { return doTransform(functionName, outTypeInfo, operatorFactory); } private SingleOutputStreamOperator doTransform( String functionName, TypeInformation outTypeInfo, StreamOperatorFactory operatorFactory) { // read the output type of the input Transforms to coax out errors about MissingTypeInfo inputStream1.getType(); inputStream2.getType(); TwoInputTransformation transform = new TwoInputTransformation<>( inputStream1.getTransformation(), inputStream2.getTransformation(), functionName, operatorFactory, outTypeInfo, environment.getParallelism(), false); TypeInformation keyType = null; if (inputStream1 instanceof KeyedStream) { KeyedStream keyedInput1 = (KeyedStream) inputStream1; keyType = keyedInput1.getKeyType(); transform.setStateKeySelectors(keyedInput1.getKeySelector(), null); transform.setStateKeyType(keyType); } if (inputStream2 instanceof KeyedStream) { KeyedStream keyedInput2 = (KeyedStream) inputStream2; TypeInformation keyType2 = keyedInput2.getKeyType(); if (keyType != null && !(keyType.canEqual(keyType2) && keyType.equals(keyType2))) { throw new UnsupportedOperationException( "Key types if input KeyedStreams " + "don't match: " + keyType + " and " + keyType2 + "."); } transform.setStateKeySelectors( transform.getStateKeySelector1(), keyedInput2.getKeySelector()); // we might be overwriting the one that's already set, but it's the same transform.setStateKeyType(keyType2); } @SuppressWarnings({"unchecked", "rawtypes"}) SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, transform); getExecutionEnvironment().addOperator(transform); return returnStream; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy