org.apache.flink.streaming.api.scala.ConnectedStreams.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.scala
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.api.java.typeutils.{TypeExtractor, ResultTypeQueryable}
import org.apache.flink.streaming.api.datastream.{ConnectedStreams => JavaCStream, DataStream => JavaStream, KeyedStream => JKeyedStream}
import org.apache.flink.streaming.api.functions.co.{CoFlatMapFunction, CoMapFunction}
import org.apache.flink.util.Collector
import scala.reflect.ClassTag
/**
* [[ConnectedStreams]] represents two connected streams of (possible) different data types. It
* can be used to apply transformations such as [[CoMapFunction]] on two
* [[DataStream]]s.
*/
class ConnectedStreams[IN1, IN2](javaStream: JavaCStream[IN1, IN2]) {
/**
* Applies a CoMap transformation on a {@link ConnectedStreams} and maps
* the output to a common type. The transformation calls a
* @param fun1 for each element of the first input and
* @param fun2 for each element of the second input. Each
* CoMapFunction call returns exactly one element.
*
* The CoMapFunction used to jointly transform the two input
* DataStreams
* @return The transformed { @link DataStream}
*/
def map[R: TypeInformation: ClassTag](fun1: IN1 => R, fun2: IN2 => R):
DataStream[R] = {
if (fun1 == null || fun2 == null) {
throw new NullPointerException("Map function must not be null.")
}
val cleanFun1 = clean(fun1)
val cleanFun2 = clean(fun2)
val comapper = new CoMapFunction[IN1, IN2, R] {
def map1(in1: IN1): R = cleanFun1(in1)
def map2(in2: IN2): R = cleanFun2(in2)
}
map(comapper)
}
/**
* Applies a CoMap transformation on a {@link ConnectedStreams} and maps
* the output to a common type. The transformation calls a
* {@link CoMapFunction#map1} for each element of the first input and
* {@link CoMapFunction#map2} for each element of the second input. Each
* CoMapFunction call returns exactly one element. The user can also extend
* {@link RichCoMapFunction} to gain access to other features provided by
* the {@link RichFuntion} interface.
*
* @param coMapper
* The CoMapFunction used to jointly transform the two input
* DataStreams
* @return The transformed { @link DataStream}
*/
def map[R: TypeInformation: ClassTag](coMapper: CoMapFunction[IN1, IN2, R]):
DataStream[R] = {
if (coMapper == null) {
throw new NullPointerException("Map function must not be null.")
}
val outType : TypeInformation[R] = implicitly[TypeInformation[R]]
javaStream.map(coMapper).returns(outType).asInstanceOf[JavaStream[R]]
}
/**
* Applies a CoFlatMap transformation on a {@link ConnectedStreams} and
* maps the output to a common type. The transformation calls a
* {@link CoFlatMapFunction#flatMap1} for each element of the first input
* and {@link CoFlatMapFunction#flatMap2} for each element of the second
* input. Each CoFlatMapFunction call returns any number of elements
* including none. The user can also extend {@link RichFlatMapFunction} to
* gain access to other features provided by the {@link RichFuntion}
* interface.
*
* @param coFlatMapper
* The CoFlatMapFunction used to jointly transform the two input
* DataStreams
* @return The transformed { @link DataStream}
*/
def flatMap[R: TypeInformation: ClassTag](coFlatMapper: CoFlatMapFunction[IN1, IN2, R]):
DataStream[R] = {
if (coFlatMapper == null) {
throw new NullPointerException("FlatMap function must not be null.")
}
val outType : TypeInformation[R] = implicitly[TypeInformation[R]]
javaStream.flatMap(coFlatMapper).returns(outType).asInstanceOf[JavaStream[R]]
}
/**
* Applies a CoFlatMap transformation on a {@link ConnectedStreams} and
* maps the output to a common type. The transformation calls a
* @param fun1 for each element of the first input
* and @param fun2 for each element of the second
* input. Each CoFlatMapFunction call returns any number of elements
* including none.
*
* @return The transformed { @link DataStream}
*/
def flatMap[R: TypeInformation: ClassTag](fun1: (IN1, Collector[R]) => Unit,
fun2: (IN2, Collector[R]) => Unit): DataStream[R] = {
if (fun1 == null || fun2 == null) {
throw new NullPointerException("FlatMap functions must not be null.")
}
val cleanFun1 = clean(fun1)
val cleanFun2 = clean(fun2)
val flatMapper = new CoFlatMapFunction[IN1, IN2, R] {
def flatMap1(value: IN1, out: Collector[R]): Unit = cleanFun1(value, out)
def flatMap2(value: IN2, out: Collector[R]): Unit = cleanFun2(value, out)
}
flatMap(flatMapper)
}
/**
* Applies a CoFlatMap transformation on a {@link ConnectedStreams} and
* maps the output to a common type. The transformation calls a
* @param fun1 for each element of the first input
* and @param fun2 for each element of the second
* input. Each CoFlatMapFunction call returns any number of elements
* including none.
*
* @return The transformed { @link DataStream}
*/
def flatMap[R: TypeInformation: ClassTag](fun1: IN1 => TraversableOnce[R],
fun2: IN2 => TraversableOnce[R]): DataStream[R] = {
if (fun1 == null || fun2 == null) {
throw new NullPointerException("FlatMap functions must not be null.")
}
val cleanFun1 = clean(fun1)
val cleanFun2 = clean(fun2)
val flatMapper = new CoFlatMapFunction[IN1, IN2, R] {
def flatMap1(value: IN1, out: Collector[R]) = { cleanFun1(value) foreach out.collect }
def flatMap2(value: IN2, out: Collector[R]) = { cleanFun2(value) foreach out.collect }
}
flatMap(flatMapper)
}
/**
* GroupBy operation for connected data stream. Groups the elements of
* input1 and input2 according to keyPosition1 and keyPosition2. Used for
* applying function on grouped data streams for example
* {@link ConnectedStreams#reduce}
*
* @param keyPosition1
* The field used to compute the hashcode of the elements in the
* first input stream.
* @param keyPosition2
* The field used to compute the hashcode of the elements in the
* second input stream.
* @return @return The transformed { @link ConnectedStreams}
*/
def keyBy(keyPosition1: Int, keyPosition2: Int): ConnectedStreams[IN1, IN2] = {
javaStream.keyBy(keyPosition1, keyPosition2)
}
/**
* GroupBy operation for connected data stream. Groups the elements of
* input1 and input2 according to keyPositions1 and keyPositions2. Used for
* applying function on grouped data streams for example
* {@link ConnectedStreams#reduce}
*
* @param keyPositions1
* The fields used to group the first input stream.
* @param keyPositions2
* The fields used to group the second input stream.
* @return @return The transformed { @link ConnectedStreams}
*/
def keyBy(keyPositions1: Array[Int], keyPositions2: Array[Int]):
ConnectedStreams[IN1, IN2] = {
javaStream.keyBy(keyPositions1, keyPositions2)
}
/**
* GroupBy operation for connected data stream using key expressions. Groups
* the elements of input1 and input2 according to field1 and field2. A field
* expression is either the name of a public field or a getter method with
* parentheses of the {@link DataStream}S underlying type. A dot can be used
* to drill down into objects, as in {@code "field1.getInnerField2()" }.
*
* @param field1
* The grouping expression for the first input
* @param field2
* The grouping expression for the second input
* @return The grouped { @link ConnectedStreams}
*/
def keyBy(field1: String, field2: String): ConnectedStreams[IN1, IN2] = {
javaStream.keyBy(field1, field2)
}
/**
* GroupBy operation for connected data stream using key expressions. Groups
* the elements of input1 and input2 according to fields1 and fields2. A
* field expression is either the name of a public field or a getter method
* with parentheses of the {@link DataStream}S underlying type. A dot can be
* used to drill down into objects, as in {@code "field1.getInnerField2()" }
* .
*
* @param fields1
* The grouping expressions for the first input
* @param fields2
* The grouping expressions for the second input
* @return The grouped { @link ConnectedStreams}
*/
def keyBy(fields1: Array[String], fields2: Array[String]):
ConnectedStreams[IN1, IN2] = {
javaStream.keyBy(fields1, fields2)
}
/**
* GroupBy operation for connected data stream. Groups the elements of
* input1 and input2 using fun1 and fun2. Used for applying
* function on grouped data streams for example
* {@link ConnectedStreams#reduce}
*
* @param fun1
* The function used for grouping the first input
* @param fun2
* The function used for grouping the second input
* @return The grouped { @link ConnectedStreams}
*/
def keyBy[K1: TypeInformation, K2: TypeInformation](fun1: IN1 => K1, fun2: IN2 => K2):
ConnectedStreams[IN1, IN2] = {
val keyType1 = implicitly[TypeInformation[K1]]
val keyType2 = implicitly[TypeInformation[K2]]
val cleanFun1 = clean(fun1)
val cleanFun2 = clean(fun2)
val keyExtractor1 = new KeySelectorWithType[IN1, K1](cleanFun1, keyType1)
val keyExtractor2 = new KeySelectorWithType[IN2, K2](cleanFun2, keyType2)
javaStream.keyBy(keyExtractor1, keyExtractor2)
}
/**
* PartitionBy operation for connected data stream. Partitions the elements of
* input1 and input2 according to keyPosition1 and keyPosition2.
*
* @param keyPosition1
* The field used to compute the hashcode of the elements in the
* first input stream.
* @param keyPosition2
* The field used to compute the hashcode of the elements in the
* second input stream.
* @return The transformed { @link ConnectedStreams}
*/
def partitionByHash(keyPosition1: Int, keyPosition2: Int): ConnectedStreams[IN1, IN2] = {
javaStream.partitionByHash(keyPosition1, keyPosition2)
}
/**
* PartitionBy operation for connected data stream. Partitions the elements of
* input1 and input2 according to keyPositions1 and keyPositions2.
*
* @param keyPositions1
* The fields used to partition the first input stream.
* @param keyPositions2
* The fields used to partition the second input stream.
* @return The transformed { @link ConnectedStreams}
*/
def partitionByHash(keyPositions1: Array[Int], keyPositions2: Array[Int]):
ConnectedStreams[IN1, IN2] = {
javaStream.partitionByHash(keyPositions1, keyPositions2)
}
/**
* PartitionBy operation for connected data stream using key expressions. Partitions
* the elements of input1 and input2 according to field1 and field2. A field
* expression is either the name of a public field or a getter method with
* parentheses of the {@link DataStream}S underlying type. A dot can be used
* to drill down into objects, as in {@code "field1.getInnerField2()" }.
*
* @param field1
* The partitioning expression for the first input
* @param field2
* The partitioning expression for the second input
* @return The grouped { @link ConnectedStreams}
*/
def partitionByHash(field1: String, field2: String): ConnectedStreams[IN1, IN2] = {
javaStream.partitionByHash(field1, field2)
}
/**
* PartitionBy operation for connected data stream using key expressions. Partitions
* the elements of input1 and input2 according to fields1 and fields2.
*
* @param fields1
* The partitioning expressions for the first input
* @param fields2
* The partitioning expressions for the second input
* @return The partitioned { @link ConnectedStreams}
*/
def partitionByHash(fields1: Array[String], fields2: Array[String]):
ConnectedStreams[IN1, IN2] = {
javaStream.partitionByHash(fields1, fields2)
}
/**
* PartitionBy operation for connected data stream. Partitions the elements of
* input1 and input2 using fun1 and fun2.
*
* @param fun1
* The function used for partitioning the first input
* @param fun2
* The function used for partitioning the second input
* @return The partitioned { @link ConnectedStreams}
*/
def partitionByHash[K: TypeInformation, L: TypeInformation](fun1: IN1 => K, fun2: IN2 => L):
ConnectedStreams[IN1, IN2] = {
val cleanFun1 = clean(fun1)
val cleanFun2 = clean(fun2)
val keyExtractor1 = new KeySelector[IN1, K] {
def getKey(in: IN1) = cleanFun1(in)
}
val keyExtractor2 = new KeySelector[IN2, L] {
def getKey(in: IN2) = cleanFun2(in)
}
javaStream.partitionByHash(keyExtractor1, keyExtractor2)
}
/**
* Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning
* is not disabled in the {@link org.apache.flink.api.common.ExecutionConfig}
*/
private[flink] def clean[F <: AnyRef](f: F): F = {
new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f)
}
}
class KeySelectorWithType[IN, K](
private[this] val fun: IN => K,
private[this] val info: TypeInformation[K])
extends KeySelector[IN, K] with ResultTypeQueryable[K] {
override def getKey(value: IN): K = fun(value)
override def getProducedType: TypeInformation[K] = info
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy