All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.scala.DataStream.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.scala

import org.apache.flink.api.common.functions.{FilterFunction, FlatMapFunction, MapFunction, Partitioner, RichFilterFunction, RichFlatMapFunction, RichMapFunction}
import org.apache.flink.api.common.io.OutputFormat
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.api.java.tuple.{Tuple => JavaTuple}
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.api.scala.operators.ScalaCsvOutputFormat
import org.apache.flink.core.fs.{FileSystem, Path}
import org.apache.flink.streaming.api.collector.selector.OutputSelector
import org.apache.flink.streaming.api.datastream.{AllWindowedStream => JavaAllWindowedStream, DataStream => JavaStream, KeyedStream => JavaKeyedStream, _}
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.functions.{AscendingTimestampExtractor, TimestampExtractor}
import org.apache.flink.streaming.api.scala.function.StatefulFunction
import org.apache.flink.streaming.api.windowing.assigners._
import org.apache.flink.streaming.api.windowing.time.AbstractTime
import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow, Window}
import org.apache.flink.streaming.util.serialization.SerializationSchema
import org.apache.flink.util.Collector

import scala.collection.JavaConverters._
import scala.reflect.ClassTag

class DataStream[T](javaStream: JavaStream[T]) {

  /**
   * Gets the underlying java DataStream object.
   */
  def getJavaStream: JavaStream[T] = javaStream

  /**
   * Returns the ID of the DataStream.
   *
   * @return ID of the DataStream
   */
  def getId = javaStream.getId

  /**
   * Returns the TypeInformation for the elements of this DataStream.
   */
  def getType(): TypeInformation[T] = javaStream.getType()

  /**
   * Sets the parallelism of this operation. This must be at least 1.
   */
  def setParallelism(parallelism: Int): DataStream[T] = {
    javaStream match {
      case ds: SingleOutputStreamOperator[_, _] => ds.setParallelism(parallelism)
      case _ =>
        throw new UnsupportedOperationException("Operator " + javaStream.toString +  " cannot " +
          "have " +
          "parallelism.")
    }
    this
  }

  /**
   * Returns the parallelism of this operation.
   */
  def getParallelism = javaStream.getParallelism
  
  /**
   * Returns the execution config.
   */
  def getExecutionConfig = javaStream.getExecutionConfig

  /**
   * Gets the name of the current data stream. This name is
   * used by the visualization and logging during runtime.
   *
   * @return Name of the stream.
   */
  def getName : String = javaStream match {
    case stream : SingleOutputStreamOperator[T,_] => stream.getName
    case _ => throw new
        UnsupportedOperationException("Only supported for operators.")
  }

  /**
   * Sets the name of the current data stream. This name is
   * used by the visualization and logging during runtime.
   *
   * @return The named operator
   */
  def name(name: String) : DataStream[T] = javaStream match {
    case stream : SingleOutputStreamOperator[T,_] => stream.name(name)
    case _ => throw new UnsupportedOperationException("Only supported for operators.")
    this
  }
  
  /**
   * Turns off chaining for this operator so thread co-location will not be
   * used as an optimization. 

Chaining can be turned off for the whole * job by [[StreamExecutionEnvironment.disableOperatorChaining()]] * however it is not advised for performance considerations. * */ def disableChaining(): DataStream[T] = { javaStream match { case ds: SingleOutputStreamOperator[_, _] => ds.disableChaining(); case _ => throw new UnsupportedOperationException("Only supported for operators.") } this } /** * Starts a new task chain beginning at this operator. This operator will * not be chained (thread co-located for increased performance) to any * previous tasks even if possible. * */ def startNewChain(): DataStream[T] = { javaStream match { case ds: SingleOutputStreamOperator[_, _] => ds.startNewChain(); case _ => throw new UnsupportedOperationException("Only supported for operators.") } this } /** * Isolates the operator in its own resource group. This will cause the * operator to grab as many task slots as its degree of parallelism. If * there are no free resources available, the job will fail to start. * All subsequent operators are assigned to the default resource group. * */ def isolateResources(): DataStream[T] = { javaStream match { case ds: SingleOutputStreamOperator[_, _] => ds.isolateResources(); case _ => throw new UnsupportedOperationException("Only supported for operators.") } this } /** * By default all operators in a streaming job share the same resource * group. Each resource group takes as many task manager slots as the * maximum parallelism operator in that group. By calling this method, this * operators starts a new resource group and all subsequent operators will * be added to this group unless specified otherwise. Please note that * local executions have by default as many available task slots as the * environment parallelism, so in order to start a new resource group the * degree of parallelism for the operators must be decreased from the * default. */ def startNewResourceGroup(): DataStream[T] = { javaStream match { case ds: SingleOutputStreamOperator[_, _] => ds.startNewResourceGroup(); case _ => throw new UnsupportedOperationException("Only supported for operators.") } this } /** * Sets the maximum time frequency (ms) for the flushing of the output * buffer. By default the output buffers flush only when they are full. * * @param timeoutMillis * The maximum time between two output flushes. * @return The operator with buffer timeout set. */ def setBufferTimeout(timeoutMillis: Long): DataStream[T] = { javaStream match { case ds: SingleOutputStreamOperator[_, _] => ds.setBufferTimeout(timeoutMillis); case _ => throw new UnsupportedOperationException("Only supported for operators.") } this } /** * Creates a new DataStream by merging DataStream outputs of * the same type with each other. The DataStreams merged using this operator * will be transformed simultaneously. * */ def union(dataStreams: DataStream[T]*): DataStream[T] = javaStream.union(dataStreams.map(_.getJavaStream): _*) /** * Creates a new ConnectedStreams by connecting * DataStream outputs of different type with each other. The * DataStreams connected using this operators can be used with CoFunctions. */ def connect[T2](dataStream: DataStream[T2]): ConnectedStreams[T, T2] = javaStream.connect(dataStream.getJavaStream) /** * Groups the elements of a DataStream by the given key positions (for tuple/array types) to * be used with grouped operators like grouped reduce or grouped aggregations. */ def keyBy(fields: Int*): KeyedStream[T, JavaTuple] = javaStream.keyBy(fields: _*) /** * Groups the elements of a DataStream by the given field expressions to * be used with grouped operators like grouped reduce or grouped aggregations. */ def keyBy(firstField: String, otherFields: String*): KeyedStream[T, JavaTuple] = javaStream.keyBy(firstField +: otherFields.toArray: _*) /** * Groups the elements of a DataStream by the given K key to * be used with grouped operators like grouped reduce or grouped aggregations. */ def keyBy[K: TypeInformation](fun: T => K): KeyedStream[T, K] = { val cleanFun = clean(fun) val keyType: TypeInformation[K] = implicitly[TypeInformation[K]] val keyExtractor = new KeySelector[T, K] with ResultTypeQueryable[K] { def getKey(in: T) = cleanFun(in) override def getProducedType: TypeInformation[K] = keyType } new JavaKeyedStream(javaStream, keyExtractor, keyType) } /** * Partitions the elements of a DataStream by the given key positions (for tuple/array types) to * be used with grouped operators like grouped reduce or grouped aggregations. */ def partitionByHash(fields: Int*): DataStream[T] = javaStream.partitionByHash(fields: _*) /** * Groups the elements of a DataStream by the given field expressions to * be used with grouped operators like grouped reduce or grouped aggregations. */ def partitionByHash(firstField: String, otherFields: String*): DataStream[T] = javaStream.partitionByHash(firstField +: otherFields.toArray: _*) /** * Groups the elements of a DataStream by the given K key to * be used with grouped operators like grouped reduce or grouped aggregations. */ def partitionByHash[K: TypeInformation](fun: T => K): DataStream[T] = { val cleanFun = clean(fun) val keyExtractor = new KeySelector[T, K] with ResultTypeQueryable[K] { def getKey(in: T) = cleanFun(in) override def getProducedType: TypeInformation[K] = implicitly[TypeInformation[K]] } javaStream.partitionByHash(keyExtractor) } /** * Partitions a tuple DataStream on the specified key fields using a custom partitioner. * This method takes the key position to partition on, and a partitioner that accepts the key * type. *

* Note: This method works only on single field keys. */ def partitionCustom[K: TypeInformation](partitioner: Partitioner[K], field: Int) : DataStream[T] = javaStream.partitionCustom(partitioner, field) /** * Partitions a POJO DataStream on the specified key fields using a custom partitioner. * This method takes the key expression to partition on, and a partitioner that accepts the key * type. *

* Note: This method works only on single field keys. */ def partitionCustom[K: TypeInformation](partitioner: Partitioner[K], field: String) : DataStream[T] = javaStream.partitionCustom(partitioner, field) /** * Partitions a DataStream on the key returned by the selector, using a custom partitioner. * This method takes the key selector to get the key to partition on, and a partitioner that * accepts the key type. *

* Note: This method works only on single field keys, i.e. the selector cannot return tuples * of fields. */ def partitionCustom[K: TypeInformation](partitioner: Partitioner[K], fun: T => K) : DataStream[T] = { val cleanFun = clean(fun) val keyExtractor = new KeySelector[T, K] with ResultTypeQueryable[K] { def getKey(in: T) = cleanFun(in) override def getProducedType: TypeInformation[K] = implicitly[TypeInformation[K]] } javaStream.partitionCustom(partitioner, keyExtractor) } /** * Sets the partitioning of the DataStream so that the output tuples * are broad casted to every parallel instance of the next component. This * setting only effects the how the outputs will be distributed between the * parallel instances of the next processing operator. * */ def broadcast: DataStream[T] = javaStream.broadcast() /** * Sets the partitioning of the DataStream so that the output values all go to * the first instance of the next processing operator. Use this setting with care * since it might cause a serious performance bottleneck in the application. */ def global: DataStream[T] = javaStream.global() /** * Sets the partitioning of the DataStream so that the output tuples * are shuffled to the next component. This setting only effects the how the * outputs will be distributed between the parallel instances of the next * processing operator. * */ def shuffle: DataStream[T] = javaStream.shuffle() /** * Sets the partitioning of the DataStream so that the output tuples * are forwarded to the local subtask of the next component (whenever * possible). This is the default partitioner setting. This setting only * effects the how the outputs will be distributed between the parallel * instances of the next processing operator. * */ def forward: DataStream[T] = javaStream.forward() /** * Sets the partitioning of the DataStream so that the output tuples * are distributed evenly to the next component.This setting only effects * the how the outputs will be distributed between the parallel instances of * the next processing operator. * */ def rebalance: DataStream[T] = javaStream.rebalance() /** * Initiates an iterative part of the program that creates a loop by feeding * back data streams. To create a streaming iteration the user needs to define * a transformation that creates two DataStreams. The first one is the output * that will be fed back to the start of the iteration and the second is the output * stream of the iterative part. *

* stepfunction: initialStream => (feedback, output) *

* A common pattern is to use output splitting to create feedback and output DataStream. * Please refer to the .split(...) method of the DataStream *

* By default a DataStream with iteration will never terminate, but the user * can use the maxWaitTime parameter to set a max waiting time for the iteration head. * If no data received in the set time the stream terminates. *

* By default the feedback partitioning is set to match the input, to override this set * the keepPartitioning flag to true * */ def iterate[R](stepFunction: DataStream[T] => (DataStream[T], DataStream[R]), maxWaitTimeMillis:Long = 0, keepPartitioning: Boolean = false) : DataStream[R] = { val iterativeStream = javaStream.iterate(maxWaitTimeMillis) val (feedback, output) = stepFunction(new DataStream[T](iterativeStream)) iterativeStream.closeWith(feedback.getJavaStream) output } /** * Initiates an iterative part of the program that creates a loop by feeding * back data streams. To create a streaming iteration the user needs to define * a transformation that creates two DataStreams. The first one is the output * that will be fed back to the start of the iteration and the second is the output * stream of the iterative part. * * The input stream of the iterate operator and the feedback stream will be treated * as a ConnectedStreams where the the input is connected with the feedback stream. * * This allows the user to distinguish standard input from feedback inputs. * *

* stepfunction: initialStream => (feedback, output) *

* The user must set the max waiting time for the iteration head. * If no data received in the set time the stream terminates. If this parameter is set * to 0 then the iteration sources will indefinitely, so the job must be killed to stop. * */ def iterate[R, F: TypeInformation: ClassTag](stepFunction: ConnectedStreams[T, F] => (DataStream[F], DataStream[R]), maxWaitTimeMillis:Long): DataStream[R] = { val feedbackType: TypeInformation[F] = implicitly[TypeInformation[F]] val connectedIterativeStream = javaStream.iterate(maxWaitTimeMillis). withFeedbackType(feedbackType) val (feedback, output) = stepFunction(connectedIterativeStream) connectedIterativeStream.closeWith(feedback.getJavaStream) output } /** * Creates a new DataStream by applying the given function to every element of this DataStream. */ def map[R: TypeInformation: ClassTag](fun: T => R): DataStream[R] = { if (fun == null) { throw new NullPointerException("Map function must not be null.") } val cleanFun = clean(fun) val mapper = new MapFunction[T, R] { def map(in: T): R = cleanFun(in) } map(mapper) } /** * Creates a new DataStream by applying the given function to every element of this DataStream. */ def map[R: TypeInformation: ClassTag](mapper: MapFunction[T, R]): DataStream[R] = { if (mapper == null) { throw new NullPointerException("Map function must not be null.") } val outType : TypeInformation[R] = implicitly[TypeInformation[R]] javaStream.map(mapper).returns(outType).asInstanceOf[JavaStream[R]] } /** * Creates a new DataStream by applying the given function to every element and flattening * the results. */ def flatMap[R: TypeInformation: ClassTag](flatMapper: FlatMapFunction[T, R]): DataStream[R] = { if (flatMapper == null) { throw new NullPointerException("FlatMap function must not be null.") } val outType : TypeInformation[R] = implicitly[TypeInformation[R]] javaStream.flatMap(flatMapper).returns(outType).asInstanceOf[JavaStream[R]] } /** * Creates a new DataStream by applying the given function to every element and flattening * the results. */ def flatMap[R: TypeInformation: ClassTag](fun: (T, Collector[R]) => Unit): DataStream[R] = { if (fun == null) { throw new NullPointerException("FlatMap function must not be null.") } val cleanFun = clean(fun) val flatMapper = new FlatMapFunction[T, R] { def flatMap(in: T, out: Collector[R]) { cleanFun(in, out) } } flatMap(flatMapper) } /** * Creates a new DataStream by applying the given function to every element and flattening * the results. */ def flatMap[R: TypeInformation: ClassTag](fun: T => TraversableOnce[R]): DataStream[R] = { if (fun == null) { throw new NullPointerException("FlatMap function must not be null.") } val cleanFun = clean(fun) val flatMapper = new FlatMapFunction[T, R] { def flatMap(in: T, out: Collector[R]) { cleanFun(in) foreach out.collect } } flatMap(flatMapper) } /** * Creates a new DataStream that contains only the elements satisfying the given filter predicate. */ def filter(filter: FilterFunction[T]): DataStream[T] = { if (filter == null) { throw new NullPointerException("Filter function must not be null.") } javaStream.filter(filter) } /** * Creates a new DataStream that contains only the elements satisfying the given filter predicate. */ def filter(fun: T => Boolean): DataStream[T] = { if (fun == null) { throw new NullPointerException("Filter function must not be null.") } val cleanFun = clean(fun) val filter = new FilterFunction[T] { def filter(in: T) = cleanFun(in) } this.filter(filter) } /** * Windows this DataStream into tumbling time windows. * * This is a shortcut for either `.window(TumblingTimeWindows.of(size))` or * `.window(TumblingProcessingTimeWindows.of(size))` depending on the time characteristic * set using * [[StreamExecutionEnvironment.setStreamTimeCharacteristic]]. * * Note: This operation can be inherently non-parallel since all elements have to pass through * the same operator instance. (Only for special cases, such as aligned time windows is * it possible to perform this operation in parallel). * * @param size The size of the window. */ def timeWindowAll(size: AbstractTime): AllWindowedStream[T, TimeWindow] = { val assigner = TumblingTimeWindows.of(size).asInstanceOf[WindowAssigner[T, TimeWindow]] windowAll(assigner) } /** * Windows this DataStream into sliding time windows. * * This is a shortcut for either `.window(SlidingTimeWindows.of(size, slide))` or * `.window(SlidingProcessingTimeWindows.of(size, slide))` depending on the time characteristic * set using * [[StreamExecutionEnvironment.setStreamTimeCharacteristic]]. * * Note: This operation can be inherently non-parallel since all elements have to pass through * the same operator instance. (Only for special cases, such as aligned time windows is * it possible to perform this operation in parallel). * * @param size The size of the window. */ def timeWindowAll(size: AbstractTime, slide: AbstractTime): AllWindowedStream[T, TimeWindow] = { val assigner = SlidingTimeWindows.of(size, slide).asInstanceOf[WindowAssigner[T, TimeWindow]] windowAll(assigner) } /** * Windows this [[DataStream]] into sliding count windows. * * Note: This operation can be inherently non-parallel since all elements have to pass through * the same operator instance. (Only for special cases, such as aligned time windows is * it possible to perform this operation in parallel). * * @param size The size of the windows in number of elements. * @param slide The slide interval in number of elements. */ def countWindowAll(size: Long, slide: Long): AllWindowedStream[T, GlobalWindow] = { new AllWindowedStream(javaStream.countWindowAll(size, slide)) } /** * Windows this [[DataStream]] into tumbling count windows. * * Note: This operation can be inherently non-parallel since all elements have to pass through * the same operator instance. (Only for special cases, such as aligned time windows is * it possible to perform this operation in parallel). * * @param size The size of the windows in number of elements. */ def countWindowAll(size: Long): AllWindowedStream[T, GlobalWindow] = { new AllWindowedStream(javaStream.countWindowAll(size)) } /** * Windows this data stream to a [[AllWindowedStream]], which evaluates windows * over a key grouped stream. Elements are put into windows by a [[WindowAssigner]]. The grouping * of elements is done both by key and by window. * * A [[org.apache.flink.streaming.api.windowing.triggers.Trigger]] can be defined to specify * when windows are evaluated. However, `WindowAssigner` have a default `Trigger` * that is used if a `Trigger` is not specified. * * Note: This operation can be inherently non-parallel since all elements have to pass through * the same operator instance. (Only for special cases, such as aligned time windows is * it possible to perform this operation in parallel). * * @param assigner The `WindowAssigner` that assigns elements to windows. * @return The trigger windows data stream. */ def windowAll[W <: Window](assigner: WindowAssigner[_ >: T, W]): AllWindowedStream[T, W] = { new AllWindowedStream[T, W](new JavaAllWindowedStream[T, W](javaStream, assigner)) } /** * Extracts a timestamp from an element and assigns it as the internal timestamp of that element. * The internal timestamps are, for example, used to to event-time window operations. * * If you know that the timestamps are strictly increasing you can use an * [[org.apache.flink.streaming.api.functions.AscendingTimestampExtractor]]. Otherwise, * you should provide a [[TimestampExtractor]] that also implements * [[TimestampExtractor#getCurrentWatermark]] to keep track of watermarks. * * @see org.apache.flink.streaming.api.watermark.Watermark */ def assignTimestamps(extractor: TimestampExtractor[T]): DataStream[T] = { javaStream.assignTimestamps(clean(extractor)) } /** * Extracts a timestamp from an element and assigns it as the internal timestamp of that element. * The internal timestamps are, for example, used to to event-time window operations. * * If you know that the timestamps are strictly increasing you can use an * [[org.apache.flink.streaming.api.functions.AscendingTimestampExtractor]]. Otherwise, * you should provide a [[TimestampExtractor]] that also implements * [[TimestampExtractor#getCurrentWatermark]] to keep track of watermarks. * * @see org.apache.flink.streaming.api.watermark.Watermark */ def assignAscendingTimestamps(extractor: T => Long): DataStream[T] = { val cleanExtractor = clean(extractor) val extractorFunction = new AscendingTimestampExtractor[T] { def extractAscendingTimestamp(element: T, currentTimestamp: Long): Long = { cleanExtractor(element) } } javaStream.assignTimestamps(extractorFunction) } /** * * Operator used for directing tuples to specific named outputs using an * OutputSelector. Calling this method on an operator creates a new * [[SplitStream]]. */ def split(selector: OutputSelector[T]): SplitStream[T] = javaStream.split(selector) /** * Creates a new [[SplitStream]] that contains only the elements satisfying the * given output selector predicate. */ def split(fun: T => TraversableOnce[String]): SplitStream[T] = { if (fun == null) { throw new NullPointerException("OutputSelector must not be null.") } val cleanFun = clean(fun) val selector = new OutputSelector[T] { def select(in: T): java.lang.Iterable[String] = { cleanFun(in).toIterable.asJava } } split(selector) } /** * Creates a co-group operation. See [[CoGroupedStreams]] for an example of how the keys * and window can be specified. */ def coGroup[T2](otherStream: DataStream[T2]): CoGroupedStreams.Unspecified[T, T2] = { CoGroupedStreams.createCoGroup(this, otherStream) } /** * Creates a join operation. See [[JoinedStreams]] for an example of how the keys * and window can be specified. */ def join[T2](otherStream: DataStream[T2]): JoinedStreams.Unspecified[T, T2] = { JoinedStreams.createJoin(this, otherStream) } /** * Writes a DataStream to the standard output stream (stdout). For each * element of the DataStream the result of .toString is * written. * */ def print(): DataStreamSink[T] = javaStream.print() /** * Writes a DataStream to the standard output stream (stderr). * * For each element of the DataStream the result of * [[AnyRef.toString()]] is written. * * @return The closed DataStream. */ def printToErr() = javaStream.printToErr() /** * Writes a DataStream to the file specified by path in text format. The * writing is performed periodically, in every millis milliseconds. For * every element of the DataStream the result of .toString * is written. * */ def writeAsText(path: String, millis: Long = 0): DataStreamSink[T] = javaStream.writeAsText(path, millis) /** * Writes a DataStream to the file specified by path in text format. The * writing is performed periodically, in every millis milliseconds. For * every element of the DataStream the result of .toString * is written. * */ def writeAsCsv( path: String, millis: Long = 0, rowDelimiter: String = ScalaCsvOutputFormat.DEFAULT_LINE_DELIMITER, fieldDelimiter: String = ScalaCsvOutputFormat.DEFAULT_FIELD_DELIMITER, writeMode: FileSystem.WriteMode = null): DataStreamSink[T] = { require(javaStream.getType.isTupleType, "CSV output can only be used with Tuple DataSets.") val of = new ScalaCsvOutputFormat[Product](new Path(path), rowDelimiter, fieldDelimiter) if (writeMode != null) { of.setWriteMode(writeMode) } javaStream.write(of.asInstanceOf[OutputFormat[T]], millis) } /** * Writes a DataStream using the given [[OutputFormat]]. The * writing is performed periodically, in every millis milliseconds. */ def write(format: OutputFormat[T], millis: Long): DataStreamSink[T] = { javaStream.write(format, millis) } /** * Writes the DataStream to a socket as a byte array. The format of the output is * specified by a [[SerializationSchema]]. */ def writeToSocket( hostname: String, port: Integer, schema: SerializationSchema[T, Array[Byte]]): DataStreamSink[T] = { javaStream.writeToSocket(hostname, port, schema) } /** * Adds the given sink to this DataStream. Only streams with sinks added * will be executed once the StreamExecutionEnvironment.execute(...) * method is called. * */ def addSink(sinkFunction: SinkFunction[T]): DataStreamSink[T] = javaStream.addSink(sinkFunction) /** * Adds the given sink to this DataStream. Only streams with sinks added * will be executed once the StreamExecutionEnvironment.execute(...) * method is called. * */ def addSink(fun: T => Unit): DataStreamSink[T] = { if (fun == null) { throw new NullPointerException("Sink function must not be null.") } val cleanFun = clean(fun) val sinkFunction = new SinkFunction[T] { def invoke(in: T) = cleanFun(in) } this.addSink(sinkFunction) } /** * Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning * is not disabled in the [[org.apache.flink.api.common.ExecutionConfig]]. */ private[flink] def clean[F <: AnyRef](f: F): F = { new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f) } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy