All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.datastream.ConnectedStreams Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.datastream;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.Utils;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoFlatMapFunction;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
import org.apache.flink.streaming.api.operators.co.CoStreamFlatMap;
import org.apache.flink.streaming.api.operators.co.CoStreamMap;
import org.apache.flink.streaming.api.transformations.TwoInputTransformation;

/**
 * {@code ConnectedStreams} represents two connected streams of (possible) different data types. It
 * can be used to apply transformations such as {@link CoMapFunction} on two
 * {@link DataStream DataStreams}
 * 
 * @param  Type of the first input data steam.
 * @param  Type of the second input data stream.
 */
public class ConnectedStreams {

	protected StreamExecutionEnvironment environment;
	protected DataStream inputStream1;
	protected DataStream inputStream2;

	protected ConnectedStreams(StreamExecutionEnvironment env,
			DataStream input1,
			DataStream input2) {
		this.environment = env;
		if (input1 != null) {
			this.inputStream1 = input1;
		}
		if (input2 != null) {
			this.inputStream2 = input2;
		}
	}

	public StreamExecutionEnvironment getExecutionEnvironment() {
		return environment;
	}

	/**
	 * Returns the first {@link DataStream}.
	 *
	 * @return The first DataStream.
	 */
	public DataStream getFirstInput() {
		return inputStream1;
	}

	/**
	 * Returns the second {@link DataStream}.
	 *
	 * @return The second DataStream.
	 */
	public DataStream getSecondInput() {
		return inputStream2;
	}

	/**
	 * Gets the type of the first input
	 *
	 * @return The type of the first input
	 */
	public TypeInformation getType1() {
		return inputStream1.getType();
	}

	/**
	 * Gets the type of the second input
	 *
	 * @return The type of the second input
	 */
	public TypeInformation getType2() {
		return inputStream2.getType();
	}

	/**
	 * KeyBy operation for connected data stream. Assigns keys to the elements of
	 * input1 and input2 according to keyPosition1 and keyPosition2.
	 *
	 * @param keyPosition1
	 *            The field used to compute the hashcode of the elements in the
	 *            first input stream.
	 * @param keyPosition2
	 *            The field used to compute the hashcode of the elements in the
	 *            second input stream.
	 * @return The grouped {@link ConnectedStreams}
	 */
	public ConnectedStreams keyBy(int keyPosition1, int keyPosition2) {
		return new ConnectedStreams<>(this.environment, inputStream1.keyBy(keyPosition1),
				inputStream2.keyBy(keyPosition2));
	}

	/**
	 * KeyBy operation for connected data stream. Assigns keys to the elements of
	 * input1 and input2 according to keyPositions1 and keyPositions2.
	 *
	 * @param keyPositions1
	 *            The fields used to group the first input stream.
	 * @param keyPositions2
	 *            The fields used to group the second input stream.
	 * @return The grouped {@link ConnectedStreams}
	 */
	public ConnectedStreams keyBy(int[] keyPositions1, int[] keyPositions2) {
		return new ConnectedStreams<>(environment, inputStream1.keyBy(keyPositions1),
				inputStream2.keyBy(keyPositions2));
	}

	/**
	 * KeyBy operation for connected data stream using key expressions. Assigns keys to
	 * the elements of input1 and input2 according to field1 and field2. A field
	 * expression is either the name of a public field or a getter method with
	 * parentheses of the {@link DataStream}S underlying type. A dot can be used
	 * to drill down into objects, as in {@code "field1.getInnerField2()" }.
	 *
	 * @param field1
	 *            The grouping expression for the first input
	 * @param field2
	 *            The grouping expression for the second input
	 * @return The grouped {@link ConnectedStreams}
	 */
	public ConnectedStreams keyBy(String field1, String field2) {
		return new ConnectedStreams<>(environment, inputStream1.keyBy(field1),
				inputStream2.keyBy(field2));
	}

	/**
	 * KeyBy operation for connected data stream using key expressions.
	 * the elements of input1 and input2 according to fields1 and fields2. A
	 * field expression is either the name of a public field or a getter method
	 * with parentheses of the {@link DataStream}S underlying type. A dot can be
	 * used to drill down into objects, as in {@code "field1.getInnerField2()" }
	 * .
	 *
	 * @param fields1
	 *            The grouping expressions for the first input
	 * @param fields2
	 *            The grouping expressions for the second input
	 * @return The grouped {@link ConnectedStreams}
	 */
	public ConnectedStreams keyBy(String[] fields1, String[] fields2) {
		return new ConnectedStreams<>(environment, inputStream1.keyBy(fields1),
				inputStream2.keyBy(fields2));
	}

	/**
	 * KeyBy operation for connected data stream. Assigns keys to the elements of
	 * input1 and input2 using keySelector1 and keySelector2.
	 *
	 * @param keySelector1
	 *            The {@link KeySelector} used for grouping the first input
	 * @param keySelector2
	 *            The {@link KeySelector} used for grouping the second input
	 * @return The partitioned {@link ConnectedStreams}
	 */
	public ConnectedStreams keyBy(KeySelector keySelector1, KeySelector keySelector2) {
		return new ConnectedStreams<>(environment, inputStream1.keyBy(keySelector1),
				inputStream2.keyBy(keySelector2));
	}

	/**
	 * PartitionBy operation for connected data stream. Partitions the elements of
	 * input1 and input2 according to keyPosition1 and keyPosition2.
	 *
	 * @param keyPosition1
	 *            The field used to compute the hashcode of the elements in the
	 *            first input stream.
	 * @param keyPosition2
	 *            The field used to compute the hashcode of the elements in the
	 *            second input stream.
	 * @return The partitioned {@link ConnectedStreams}
	 */
	public ConnectedStreams partitionByHash(int keyPosition1, int keyPosition2) {
		return new ConnectedStreams<>(environment, inputStream1.partitionByHash(keyPosition1),
				inputStream2.partitionByHash(keyPosition2));
	}

	/**
	 * PartitionBy operation for connected data stream. Partitions the elements of
	 * input1 and input2 according to keyPositions1 and keyPositions2.
	 *
	 * @param keyPositions1
	 *            The fields used to group the first input stream.
	 * @param keyPositions2
	 *            The fields used to group the second input stream.
	 * @return The partitioned {@link ConnectedStreams}
	 */
	public ConnectedStreams partitionByHash(int[] keyPositions1, int[] keyPositions2) {
		return new ConnectedStreams<>(environment, inputStream1.partitionByHash(keyPositions1),
				inputStream2.partitionByHash(keyPositions2));
	}

	/**
	 * PartitionBy operation for connected data stream using key expressions. Partitions
	 * the elements of input1 and input2 according to field1 and field2. A
	 * field expression is either the name of a public field or a getter method
	 * with parentheses of the {@link DataStream}s underlying type. A dot can be
	 * used to drill down into objects, as in {@code "field1.getInnerField2()" }
	 *
	 * @param field1
	 *            The partitioning expressions for the first input
	 * @param field2
	 *            The partitioning expressions for the second input
	 * @return The partitioned {@link ConnectedStreams}
	 */
	public ConnectedStreams partitionByHash(String field1, String field2) {
		return new ConnectedStreams<>(environment, inputStream1.partitionByHash(field1),
				inputStream2.partitionByHash(field2));
	}

	/**
	 * PartitionBy operation for connected data stream using key expressions. Partitions
	 * the elements of input1 and input2 according to fields1 and fields2. A
	 * field expression is either the name of a public field or a getter method
	 * with parentheses of the {@link DataStream}s underlying type. A dot can be
	 * used to drill down into objects, as in {@code "field1.getInnerField2()" }
	 *
	 * @param fields1
	 *            The partitioning expressions for the first input
	 * @param fields2
	 *            The partitioning expressions for the second input
	 * @return The partitioned {@link ConnectedStreams}
	 */
	public ConnectedStreams partitionByHash(String[] fields1, String[] fields2) {
		return new ConnectedStreams<>(environment, inputStream1.partitionByHash(fields1),
				inputStream2.partitionByHash(fields2));
	}

	/**
	 * PartitionBy operation for connected data stream. Partitions the elements of
	 * input1 and input2 using keySelector1 and keySelector2.
	 *
	 * @param keySelector1
	 *            The {@link KeySelector} used for partitioning the first input
	 * @param keySelector2
	 *            The {@link KeySelector} used for partitioning the second input
	 * @return @return The partitioned {@link ConnectedStreams}
	 */
	public ConnectedStreams partitionByHash(KeySelector keySelector1, KeySelector keySelector2) {
		return new ConnectedStreams<>(environment, inputStream1.partitionByHash(keySelector1),
				inputStream2.partitionByHash(keySelector2));
	}

	/**
	 * Applies a CoMap transformation on a {@link ConnectedStreams} and maps
	 * the output to a common type. The transformation calls a
	 * {@link CoMapFunction#map1} for each element of the first input and
	 * {@link CoMapFunction#map2} for each element of the second input. Each
	 * CoMapFunction call returns exactly one element.
	 * 
	 * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams
	 * @return The transformed {@link DataStream}
	 */
	public  SingleOutputStreamOperator map(CoMapFunction coMapper) {

		TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType(coMapper,
				CoMapFunction.class, false, true, getType1(), getType2(),
				Utils.getCallLocationName(), true);

		return transform("Co-Map", outTypeInfo, new CoStreamMap<>(inputStream1.clean(coMapper)));

	}

	/**
	 * Applies a CoFlatMap transformation on a {@link ConnectedStreams} and
	 * maps the output to a common type. The transformation calls a
	 * {@link CoFlatMapFunction#flatMap1} for each element of the first input
	 * and {@link CoFlatMapFunction#flatMap2} for each element of the second
	 * input. Each CoFlatMapFunction call returns any number of elements
	 * including none.
	 * 
	 * @param coFlatMapper
	 *            The CoFlatMapFunction used to jointly transform the two input
	 *            DataStreams
	 * @return The transformed {@link DataStream}
	 */
	public  SingleOutputStreamOperator flatMap(
			CoFlatMapFunction coFlatMapper) {

		TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType(coFlatMapper,
				CoFlatMapFunction.class, false, true, getType1(), getType2(),
				Utils.getCallLocationName(), true);

		return transform("Co-Flat Map", outTypeInfo, new CoStreamFlatMap<>(inputStream1.clean(coFlatMapper)));
	}

	public  SingleOutputStreamOperator transform(String functionName,
			TypeInformation outTypeInfo,
			TwoInputStreamOperator operator) {

		// read the output type of the input Transforms to coax out errors about MissingTypeInfo
		inputStream1.getType();
		inputStream2.getType();

		TwoInputTransformation transform = new TwoInputTransformation<>(
				inputStream1.getTransformation(),
				inputStream2.getTransformation(),
				functionName,
				operator,
				outTypeInfo,
				environment.getParallelism());

		@SuppressWarnings({ "unchecked", "rawtypes" })
		SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, transform);

		getExecutionEnvironment().addOperator(transform);

		return returnStream;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy