All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.SparkAdapter330.scala Maven / Gradle / Ivy

There is a newer version: 2.3.0
Show newest version
package tech.ytsaurus.spyt

import org.antlr.v4.runtime.ParserRuleContext
import org.apache.log4j.spi.LoggingEvent
import org.apache.log4j.{Category, Level}
import org.apache.spark.executor.{ExecutorBackendFactory, ExecutorBackendFactory330}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.connector.read.{InputPartition, ScanBuilder}
import org.apache.spark.sql.connector.read.partitioning.{Partitioning, UnknownPartitioning}
import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDDPartition, FileScanBuilder, PushDownUtils}
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.v2.{ScanBuilderAdapter, YtScanBuilder330}
import org.apache.spark.sql.Utils330
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.parser.ParserUtils
import org.apache.spark.sql.execution.datasources.PartitionedFile
import tech.ytsaurus.spyt.format.YtPartitioningSupport.YtPartitionedFileBase
import tech.ytsaurus.spyt.format.{YtPartitionedFile330, YtPartitioningDelegate}

class SparkAdapter330 extends SparkAdapter {

  override def minSparkVersion: String = "3.3.0"

  override def createYtScanOutputPartitioning(nPartitions: Int): Partitioning = {
    // TODO consider using org.apache.spark.sql.connector.read.partitioning.KeyGroupedPartitioning
    new UnknownPartitioning(nPartitions)
  }

  override def createYtScanBuilder(scanBuilderAdapter: ScanBuilderAdapter): FileScanBuilder = {
    new YtScanBuilder330(scanBuilderAdapter)
  }

  override def checkPushedFilters(scanBuilder: ScanBuilder,
                                  filters: Seq[Expression],
                                  expected: Seq[Filter]): (Seq[Any], Seq[Any]) = {
    val filtersOrPredicates =  PushDownUtils.pushFilters(scanBuilder, filters)._1
    filtersOrPredicates match {
      case Left(filters) => (filters, expected)
      case Right(predicates) => (predicates, expected.map(Utils330.filterToPredicate))
    }
  }

  override def executorBackendFactory: ExecutorBackendFactory = ExecutorBackendFactory330

  override def createPartitionedFile(partitionValues: InternalRow, filePath: String,
                                     start: Long, length: Long): PartitionedFile = {
    PartitionedFile(partitionValues, filePath, start, length)
  }

  override def createYtPartitionedFile[T <: YtPartitioningDelegate](delegate: T): YtPartitionedFileBase[T] = {
    new YtPartitionedFile330(delegate)
  }

  override def parserUtilsWithOrigin[T](ctx: ParserRuleContext)(f: => T): T = {
    ParserUtils.withOrigin(ctx)(f)
  }

  override def getInputPartition(dsrddPartition: DataSourceRDDPartition): InputPartition = {
    dsrddPartition.inputPartitions.head
  }

  override def createLoggingEvent(fqnOfCategoryClass: String, logger: Category,
                                  timeStamp: Long, level: Level,
                                  message: String, throwable: Throwable): LoggingEvent = {
    Utils330.createLoggingEvent(fqnOfCategoryClass, logger, timeStamp, level, message, throwable)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy