All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.spark.PaimonBaseScanBuilder.scala Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.paimon.spark

import org.apache.paimon.predicate.{PartitionPredicateVisitor, Predicate, PredicateBuilder}
import org.apache.paimon.table.Table
import org.apache.paimon.table.source.ReadBuilder

import org.apache.spark.internal.Logging
import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.StructType

import scala.collection.mutable

abstract class PaimonBaseScanBuilder(table: Table)
  extends ScanBuilder
  with SupportsPushDownFilters
  with SupportsPushDownRequiredColumns
  with Logging {

  protected var predicates: Option[Predicate] = None

  protected var pushed: Option[Array[Filter]] = None

  protected var projectedIndexes: Option[Array[Int]] = None

  protected def getReadBuilder(): ReadBuilder = {
    val readBuilder = table.newReadBuilder()
    projectedIndexes.foreach(readBuilder.withProjection)
    predicates.foreach(readBuilder.withFilter)

    readBuilder
  }

  override def build(): Scan = {
    new PaimonScan(table, getReadBuilder());
  }

  /**
   * Pushes down filters, and returns filters that need to be evaluated after scanning. 

Rows * should be returned from the data source if and only if all of the filters match. That is, * filters must be interpreted as ANDed together. */ override def pushFilters(filters: Array[Filter]): Array[Filter] = { val pushable = mutable.ArrayBuffer.empty[Filter] val postScan = mutable.ArrayBuffer.empty[Filter] val predicates = mutable.ArrayBuffer.empty[Predicate] val converter = new SparkFilterConverter(table.rowType) val visitor = new PartitionPredicateVisitor(table.partitionKeys()) filters.foreach { filter => try { val predicate = converter.convert(filter) pushable.append(filter) predicates.append(predicate) if (!predicate.visit(visitor)) postScan.append(filter) } catch { case e: UnsupportedOperationException => logWarning(e.getMessage) postScan.append(filter) } } if (predicates.nonEmpty) { this.predicates = Some(PredicateBuilder.and(predicates: _*)) } this.pushed = Some(pushable.toArray) postScan.toArray } /** * Returns the filters that are pushed to the data source via {@link # pushFilters ( Filter [ ] * )}.

There are 3 kinds of filters:

  1. pushable filters which don't need to be * evaluated again after scanning.
  2. pushable filters which still need to be evaluated * after scanning, e.g. parquet row group filter.
  3. non-pushable filters.

* Both case 1 and 2 should be considered as pushed filters and should be returned by this method. *

It's possible that there is no filters in the query and {@link # pushFilters ( Filter [ ] * )} is never called, empty array should be returned for this case. */ override def pushedFilters(): Array[Filter] = { pushed.getOrElse(Array.empty) } override def pruneColumns(requiredSchema: StructType): Unit = { val pruneFields = requiredSchema.fieldNames val fieldNames = table.rowType.getFieldNames val projected = pruneFields.map(field => fieldNames.indexOf(field)) this.projectedIndexes = Some(projected) } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy