All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.internal.ToScalaUDF.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.spark.sql.internal

import scala.jdk.CollectionConverters._

import org.apache.spark.api.java.function.{CoGroupFunction, FilterFunction, FlatMapFunction, FlatMapGroupsFunction, FlatMapGroupsWithStateFunction, ForeachFunction, ForeachPartitionFunction, MapFunction, MapGroupsFunction, MapGroupsWithStateFunction, MapPartitionsFunction, ReduceFunction}
import org.apache.spark.sql.api.java._
import org.apache.spark.sql.streaming.GroupState

/**
 * Helper class that provides conversions from org.apache.spark.sql.api.java.Function* and
 * org.apache.spark.api.java.function.* to scala functions.
 *
 * Please note that this class is being used in Spark Connect Scala UDFs. We need to be careful
 * with any modifications to this class, otherwise we will break backwards compatibility.
 * Concretely this means you can only add methods to this class. You cannot rename the class, move
 * it, change its `serialVersionUID`, remove methods, change method signatures, or change method
 * semantics.
 */
@SerialVersionUID(2019907615267866045L)
private[sql] object ToScalaUDF extends Serializable {
  def apply[T](f: FilterFunction[T]): T => Boolean = f.call

  def apply[T](f: ReduceFunction[T]): (T, T) => T = f.call

  def apply[V, W](f: MapFunction[V, W]): V => W = f.call

  def apply[K, V, U](f: MapGroupsFunction[K, V, U]): (K, Iterator[V]) => U =
    (key, values) => f.call(key, values.asJava)

  def apply[K, V, S, U](
      f: MapGroupsWithStateFunction[K, V, S, U]): (K, Iterator[V], GroupState[S]) => U =
    (key, values, state) => f.call(key, values.asJava, state)

  def apply[V, U](f: MapPartitionsFunction[V, U]): Iterator[V] => Iterator[U] =
    values => f.call(values.asJava).asScala

  def apply[K, V, U](f: FlatMapGroupsFunction[K, V, U]): (K, Iterator[V]) => Iterator[U] =
    (key, values) => f.call(key, values.asJava).asScala

  def apply[K, V, S, U](f: FlatMapGroupsWithStateFunction[K, V, S, U])
      : (K, Iterator[V], GroupState[S]) => Iterator[U] =
    (key, values, state) => f.call(key, values.asJava, state).asScala

  def apply[K, V, U, R](
      f: CoGroupFunction[K, V, U, R]): (K, Iterator[V], Iterator[U]) => Iterator[R] =
    (key, left, right) => f.call(key, left.asJava, right.asJava).asScala

  def apply[V](f: ForeachFunction[V]): V => Unit = f.call

  def apply[V](f: ForeachPartitionFunction[V]): Iterator[V] => Unit =
    values => f.call(values.asJava)

  // scalastyle:off line.size.limit

  /* register 0-22 were generated by this script

    (0 to 22).foreach { i =>
      val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
      val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
      val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs]]"
      val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
      val funcCall = if (i == 0) s"() => f$anyCast.call($anyParams)" else s"f$anyCast.call($anyParams)"
      println(s"""
        |/**
        | * Create a scala.Function$i wrapper for a org.apache.spark.sql.api.java.UDF$i instance.
        | */
        |def apply(f: UDF$i[$extTypeArgs]): Function$i[$anyTypeArgs] = {
        |  $funcCall
        |}""".stripMargin)
    }
   */

  /**
   * Create a scala.Function0 wrapper for a org.apache.spark.sql.api.java.UDF0 instance.
   */
  def apply(f: UDF0[_]): () => Any = { () =>
    f.asInstanceOf[UDF0[Any]].call()
  }

  /**
   * Create a scala.Function1 wrapper for a org.apache.spark.sql.api.java.UDF1 instance.
   */
  def apply(f: UDF1[_, _]): (Any) => Any = {
    f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
  }

  /**
   * Create a scala.Function2 wrapper for a org.apache.spark.sql.api.java.UDF2 instance.
   */
  def apply(f: UDF2[_, _, _]): (Any, Any) => Any = {
    f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
  }

  /**
   * Create a scala.Function3 wrapper for a org.apache.spark.sql.api.java.UDF3 instance.
   */
  def apply(f: UDF3[_, _, _, _]): (Any, Any, Any) => Any = {
    f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function4 wrapper for a org.apache.spark.sql.api.java.UDF4 instance.
   */
  def apply(f: UDF4[_, _, _, _, _]): (Any, Any, Any, Any) => Any = {
    f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function5 wrapper for a org.apache.spark.sql.api.java.UDF5 instance.
   */
  def apply(f: UDF5[_, _, _, _, _, _]): (Any, Any, Any, Any, Any) => Any = {
    f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]]
      .call(_: Any, _: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function6 wrapper for a org.apache.spark.sql.api.java.UDF6 instance.
   */
  def apply(f: UDF6[_, _, _, _, _, _, _]): (Any, Any, Any, Any, Any, Any) => Any = {
    f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]]
      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function7 wrapper for a org.apache.spark.sql.api.java.UDF7 instance.
   */
  def apply(f: UDF7[_, _, _, _, _, _, _, _]): (Any, Any, Any, Any, Any, Any, Any) => Any = {
    f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function8 wrapper for a org.apache.spark.sql.api.java.UDF8 instance.
   */
  def apply(
      f: UDF8[_, _, _, _, _, _, _, _, _]): (Any, Any, Any, Any, Any, Any, Any, Any) => Any = {
    f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function9 wrapper for a org.apache.spark.sql.api.java.UDF9 instance.
   */
  def apply(f: UDF9[_, _, _, _, _, _, _, _, _, _])
      : (Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any = {
    f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function10 wrapper for a org.apache.spark.sql.api.java.UDF10 instance.
   */
  def apply(f: UDF10[_, _, _, _, _, _, _, _, _, _, _])
      : Function10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
    f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
  }

  /**
   * Create a scala.Function11 wrapper for a org.apache.spark.sql.api.java.UDF11 instance.
   */
  def apply(f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _])
      : Function11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
    f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function12 wrapper for a org.apache.spark.sql.api.java.UDF12 instance.
   */
  def apply(f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _])
      : Function12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
    f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function13 wrapper for a org.apache.spark.sql.api.java.UDF13 instance.
   */
  def apply(f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _])
      : Function13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
    f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function14 wrapper for a org.apache.spark.sql.api.java.UDF14 instance.
   */
  def apply(f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _])
      : Function14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
    f.asInstanceOf[
      UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function15 wrapper for a org.apache.spark.sql.api.java.UDF15 instance.
   */
  def apply(f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function15[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[
      UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function16 wrapper for a org.apache.spark.sql.api.java.UDF16 instance.
   */
  def apply(f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function16[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[
      UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function17 wrapper for a org.apache.spark.sql.api.java.UDF17 instance.
   */
  def apply(f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function17[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[UDF17[
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function18 wrapper for a org.apache.spark.sql.api.java.UDF18 instance.
   */
  def apply(f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function18[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[UDF18[
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function19 wrapper for a org.apache.spark.sql.api.java.UDF19 instance.
   */
  def apply(f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function19[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[UDF19[
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function20 wrapper for a org.apache.spark.sql.api.java.UDF20 instance.
   */
  def apply(f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function20[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[UDF20[
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function21 wrapper for a org.apache.spark.sql.api.java.UDF21 instance.
   */
  def apply(
      f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function21[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[UDF21[
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }

  /**
   * Create a scala.Function22 wrapper for a org.apache.spark.sql.api.java.UDF22 instance.
   */
  def apply(
      f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function22[
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any,
    Any] = {
    f.asInstanceOf[UDF22[
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any,
      Any]]
      .call(
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any,
        _: Any)
  }
  // scalastyle:on line.size.limit
}

/**
 * Adaptors from one UDF shape to another. For example adapting a foreach function for use in
 * foreachPartition.
 *
 * Please note that this class is being used in Spark Connect Scala UDFs. We need to be careful
 * with any modifications to this class, otherwise we will break backwards compatibility.
 * Concretely this means you can only add methods to this class. You cannot rename the class, move
 * it, change its `serialVersionUID`, remove methods, change method signatures, or change method
 * semantics.
 */
@SerialVersionUID(0L) // TODO
object UDFAdaptors extends Serializable {
  def flatMapToMapPartitions[V, U](f: V => IterableOnce[U]): Iterator[V] => Iterator[U] =
    values => values.flatMap(f)

  def flatMapToMapPartitions[V, U](f: FlatMapFunction[V, U]): Iterator[V] => Iterator[U] =
    values => values.flatMap(v => f.call(v).asScala)

  def mapToMapPartitions[V, U](f: V => U): Iterator[V] => Iterator[U] = values => values.map(f)

  def mapToMapPartitions[V, U](f: MapFunction[V, U]): Iterator[V] => Iterator[U] =
    values => values.map(f.call)

  def foreachToForeachPartition[T](f: T => Unit): Iterator[T] => Unit =
    values => values.foreach(f)

  def foreachToForeachPartition[T](f: ForeachFunction[T]): Iterator[T] => Unit =
    values => values.foreach(f.call)

  def foreachPartitionToMapPartitions[V, U](f: Iterator[V] => Unit): Iterator[V] => Iterator[U] =
    values => {
      f(values)
      Iterator.empty[U]
    }

  def iterableOnceToSeq[A, B](f: A => IterableOnce[B]): A => Seq[B] =
    value => f(value).iterator.toSeq

  def mapGroupsToFlatMapGroups[K, V, U](
      f: (K, Iterator[V]) => U): (K, Iterator[V]) => Iterator[U] =
    (key, values) => Iterator.single(f(key, values))

  def mapGroupsWithStateToFlatMapWithState[K, V, S, U](
      f: (K, Iterator[V], GroupState[S]) => U): (K, Iterator[V], GroupState[S]) => Iterator[U] =
    (key: K, values: Iterator[V], state: GroupState[S]) => Iterator(f(key, values, state))

  def coGroupWithMappedValues[K, V, U, R, IV, IU](
      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R],
      leftValueMapFunc: Option[IV => V],
      rightValueMapFunc: Option[IU => U]): (K, Iterator[IV], Iterator[IU]) => IterableOnce[R] = {
    (leftValueMapFunc, rightValueMapFunc) match {
      case (None, None) =>
        f.asInstanceOf[(K, Iterator[IV], Iterator[IU]) => IterableOnce[R]]
      case (Some(mapLeft), None) =>
        (k, left, right) => f(k, left.map(mapLeft), right.asInstanceOf[Iterator[U]])
      case (None, Some(mapRight)) =>
        (k, left, right) => f(k, left.asInstanceOf[Iterator[V]], right.map(mapRight))
      case (Some(mapLeft), Some(mapRight)) =>
        (k, left, right) => f(k, left.map(mapLeft), right.map(mapRight))
    }
  }

  def flatMapGroupsWithMappedValues[K, IV, V, R](
      f: (K, Iterator[V]) => IterableOnce[R],
      valueMapFunc: Option[IV => V]): (K, Iterator[IV]) => IterableOnce[R] = valueMapFunc match {
    case Some(mapValue) => (k, values) => f(k, values.map(mapValue))
    case None => f.asInstanceOf[(K, Iterator[IV]) => IterableOnce[R]]
  }

  def flatMapGroupsWithStateWithMappedValues[K, IV, V, S, U](
      f: (K, Iterator[V], GroupState[S]) => Iterator[U],
      valueMapFunc: Option[IV => V]): (K, Iterator[IV], GroupState[S]) => Iterator[U] = {
    valueMapFunc match {
      case Some(mapValue) => (k, values, state) => f(k, values.map(mapValue), state)
      case None => f.asInstanceOf[(K, Iterator[IV], GroupState[S]) => Iterator[U]]
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy