All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flinkx.api.DataStreamUtils.scala Maven / Gradle / Ivy

package org.apache.flinkx.api

import org.apache.flink.annotation.Experimental
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.streaming.api.datastream.{DataStreamUtils => JavaStreamUtils}

import scala.jdk.CollectionConverters._
import scala.reflect.ClassTag
import ScalaStreamOps._

/** This class provides simple utility methods for collecting a [[DataStream]], effectively enriching it with the
  * functionality encapsulated by [[DataStreamUtils]].
  *
  * This experimental class is relocated from flink-streaming-contrib.
  *
  * @param self
  *   DataStream
  */
@Experimental
class DataStreamUtils[T: TypeInformation: ClassTag](val self: DataStream[T]) {

  /** Returns a scala iterator to iterate over the elements of the DataStream.
    * @return
    *   The iterator
    *
    */
  def collect(): Iterator[T] =
    self.javaStream.executeAndCollect().asScala

  /** Reinterprets the given [[DataStream]] as a [[KeyedStream]], which extracts keys with the given [[KeySelector]].
    *
    * IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be partitioned
    * exactly in the same way as if it was created through a [[DataStream#keyBy(KeySelector)]].
    *
    * @param keySelector
    *   Function that defines how keys are extracted from the data stream.
    * @return
    *   The reinterpretation of the [[DataStream]] as a [[KeyedStream]].
    */
  def reinterpretAsKeyedStream[K: TypeInformation](keySelector: T => K): KeyedStream[T, K] = {

    val keyTypeInfo     = implicitly[TypeInformation[K]]
    val cleanSelector   = clean(keySelector)
    val javaKeySelector = new JavaKeySelector[T, K](cleanSelector)

    asScalaStream(JavaStreamUtils.reinterpretAsKeyedStream(self.javaStream, javaKeySelector, keyTypeInfo))
  }

  private[flinkx] def clean[F <: AnyRef](f: F): F = {
    new StreamExecutionEnvironment(self.javaStream.getExecutionEnvironment).scalaClean(f)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy