org.apache.hudi.ExpressionIndexSupport.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hudi-spark-common_2.13 Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi

import org.apache.hudi.ColumnStatsIndexSupport.{composeColumnStatStructType, deserialize, tryUnpackValueWrapper}
import org.apache.hudi.ExpressionIndexSupport._
import org.apache.hudi.HoodieCatalystUtils.{withPersistedData, withPersistedDataset}
import org.apache.hudi.HoodieConversionUtils.toScalaOption
import org.apache.hudi.HoodieSparkExpressionIndex.SPARK_FUNCTION_MAP
import org.apache.hudi.RecordLevelIndexSupport.{fetchQueryWithAttribute, filterQueryWithRecordKey}
import org.apache.hudi.avro.model.{HoodieMetadataColumnStats, HoodieMetadataRecord}
import org.apache.hudi.common.config.HoodieMetadataConfig
import org.apache.hudi.common.data.HoodieData
import org.apache.hudi.common.model.{FileSlice, HoodieIndexDefinition, HoodieRecord}
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.util.ValidationUtils.checkState
import org.apache.hudi.common.util.collection
import org.apache.hudi.common.util.hash.{ColumnIndexID, PartitionIndexID}
import org.apache.hudi.data.HoodieJavaRDD
import org.apache.hudi.index.functional.HoodieExpressionIndex.SPARK_FROM_UNIXTIME
import org.apache.hudi.metadata.{HoodieMetadataPayload, HoodieTableMetadataUtil, MetadataPartitionType}
import org.apache.hudi.util.JFunction
import org.apache.spark.sql.HoodieUnsafeUtils.{createDataFrameFromInternalRows, createDataFrameFromRDD, createDataFrameFromRows}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{EqualTo, Expression, FromUnixTime, In, Literal, UnaryExpression}
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.storage.StorageLevel

import scala.collection.JavaConverters._
import scala.collection.immutable.TreeSet
import scala.collection.mutable.ListBuffer
import scala.collection.parallel.mutable.ParHashMap

class ExpressionIndexSupport(spark: SparkSession,
                             tableSchema: StructType,
                             metadataConfig: HoodieMetadataConfig,
                             metaClient: HoodieTableMetaClient)
  extends SparkBaseIndexSupport (spark, metadataConfig, metaClient) {

  @transient private lazy val cachedColumnStatsIndexViews: ParHashMap[Seq[String], DataFrame] = ParHashMap()


  // NOTE: Since [[metadataConfig]] is transient this has to be eagerly persisted, before this will be passed on to the executor
  private val inMemoryProjectionThreshold = metadataConfig.getColumnStatsIndexInMemoryProjectionThreshold

  override def getIndexName: String = ExpressionIndexSupport.INDEX_NAME

  override def computeCandidateFileNames(fileIndex: HoodieFileIndex,
                                         queryFilters: Seq[Expression],
                                         queryReferencedColumns: Seq[String],
                                         prunedPartitionsAndFileSlices: Seq[(Option[BaseHoodieTableFileIndex.PartitionPath], Seq[FileSlice])],
                                         shouldPushDownFilesFilter: Boolean
                                        ): Option[Set[String]] = {
    lazy val expressionIndexPartitionOpt = getExpressionIndexPartitionAndLiterals(queryFilters)
    if (isIndexAvailable && queryFilters.nonEmpty && expressionIndexPartitionOpt.nonEmpty) {
      val (indexPartition, expressionIndexQuery, literals) = expressionIndexPartitionOpt.get
      val indexDefinition = metaClient.getIndexMetadata.get().getIndexDefinitions.get(indexPartition)
      if (indexDefinition.getIndexType.equals(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS)) {
        val readInMemory = shouldReadInMemory(fileIndex, queryReferencedColumns, inMemoryProjectionThreshold)
        val (prunedPartitions, prunedFileNames) = getPrunedPartitionsAndFileNames(fileIndex, prunedPartitionsAndFileSlices)
        val expressionIndexRecords = loadExpressionIndexRecords(indexPartition, prunedPartitions, readInMemory)
        loadTransposed(queryReferencedColumns, readInMemory, expressionIndexRecords, expressionIndexQuery) {
          transposedColStatsDF =>Some(getCandidateFiles(transposedColStatsDF, Seq(expressionIndexQuery), prunedFileNames, isExpressionIndex = true))
        }
      } else if (indexDefinition.getIndexType.equals(HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS)) {
        val prunedPartitionAndFileNames = getPrunedPartitionsAndFileNamesMap(prunedPartitionsAndFileSlices, includeLogFiles = true)
        Option.apply(getCandidateFilesForKeys(indexPartition, prunedPartitionAndFileNames, literals))
      } else {
        Option.empty
      }
    } else {
      Option.empty
    }
  }

  /**
   * Loads view of the Column Stats Index in a transposed format where single row coalesces every columns'
   * statistics for a single file, returning it as [[DataFrame]]
   *
   * Please check out scala-doc of the [[transpose]] method explaining this view in more details
   */
  def loadTransposed[T](targetColumns: Seq[String],
                        shouldReadInMemory: Boolean,
                        colStatRecords: HoodieData[HoodieMetadataColumnStats],
                        expressionIndexQuery: Expression) (block: DataFrame => T): T = {
    cachedColumnStatsIndexViews.get(targetColumns) match {
      case Some(cachedDF) =>
        block(cachedDF)
      case None =>
        val colStatsRecords: HoodieData[HoodieMetadataColumnStats] = colStatRecords
        withPersistedData(colStatsRecords, StorageLevel.MEMORY_ONLY) {
          val (transposedRows, indexSchema) = transpose(colStatsRecords, targetColumns, expressionIndexQuery)
          val df = if (shouldReadInMemory) {
            // NOTE: This will instantiate a [[Dataset]] backed by [[LocalRelation]] holding all of the rows
            //       of the transposed table in memory, facilitating execution of the subsequently chained operations
            //       on it locally (on the driver; all such operations are actually going to be performed by Spark's
            //       Optimizer)
            createDataFrameFromRows(spark, transposedRows.collectAsList().asScala.toSeq, indexSchema)
          } else {
            val rdd = HoodieJavaRDD.getJavaRDD(transposedRows)
            spark.createDataFrame(rdd, indexSchema)
          }

          val allowCaching: Boolean = false
          if (allowCaching) {
            cachedColumnStatsIndexViews.put(targetColumns, df)
            // NOTE: Instead of collecting the rows from the index and hold them in memory, we instead rely
            //       on Spark as (potentially distributed) cache managing data lifecycle, while we simply keep
            //       the referenced to persisted [[DataFrame]] instance
            df.persist(StorageLevel.MEMORY_ONLY)

            block(df)
          } else {
            withPersistedDataset(df) {
              block(df)
            }
          }
        }
    }
  }

  /**
   * Transposes and converts the raw table format of the Column Stats Index representation,
   * where each row/record corresponds to individual (column, file) pair, into the table format
   * where each row corresponds to single file with statistic for individual columns collated
   * w/in such row:
   *
   * Metadata Table Column Stats Index format:
   *
   *    *  +---------------------------+------------+------------+------------+-------------+
   *  |        fileName           | columnName |  minValue  |  maxValue  |  num_nulls  |
   *  +---------------------------+------------+------------+------------+-------------+
   *  | one_base_file.parquet     |          A |          1 |         10 |           0 |
   *  | another_base_file.parquet |          A |        -10 |          0 |           5 |
   *  +---------------------------+------------+------------+------------+-------------+
   * 
   *
   * Returned table format
   *
   *    *  +---------------------------+------------+------------+-------------+
   *  |          file             | A_minValue | A_maxValue | A_nullCount |
   *  +---------------------------+------------+------------+-------------+
   *  | one_base_file.parquet     |          1 |         10 |           0 |
   *  | another_base_file.parquet |        -10 |          0 |           5 |
   *  +---------------------------+------------+------------+-------------+
   * 
   *
   * NOTE: Column Stats Index might potentially contain statistics for many columns (if not all), while
   *       query at hand might only be referencing a handful of those. As such, we collect all the
   *       column references from the filtering expressions, and only transpose records corresponding to the
   *       columns referenced in those
   *
   * @param colStatsRecords [[HoodieData[HoodieMetadataColumnStats]]] bearing raw Column Stats Index records
   * @param queryColumns target columns to be included into the final table
   * @return reshaped table according to the format outlined above
   */
  private def transpose(colStatsRecords: HoodieData[HoodieMetadataColumnStats], queryColumns: Seq[String], expressionIndexQuery: Expression): (HoodieData[Row], StructType) = {
    val tableSchemaFieldMap = tableSchema.fields.map(f => (f.name, f)).toMap
    // NOTE: We're sorting the columns to make sure final index schema matches layout
    //       of the transposed table
    val sortedTargetColumnsSet = TreeSet(queryColumns:_*)

    // NOTE: This is a trick to avoid pulling all of [[ColumnStatsIndexSupport]] object into the lambdas'
    //       closures below
    val indexedColumns = queryColumns.toSet

    // NOTE: It's crucial to maintain appropriate ordering of the columns
    //       matching table layout: hence, we cherry-pick individual columns
    //       instead of simply filtering in the ones we're interested in the schema
    val (indexSchema, targetIndexedColumns) = composeIndexSchema(sortedTargetColumnsSet.toSeq, indexedColumns, tableSchema, expressionIndexQuery)

    // Here we perform complex transformation which requires us to modify the layout of the rows
    // of the dataset, and therefore we rely on low-level RDD API to avoid incurring encoding/decoding
    // penalty of the [[Dataset]], since it's required to adhere to its schema at all times, while
    // RDDs are not;
    val transposedRows: HoodieData[Row] = colStatsRecords
      //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+
      .filter(JFunction.toJavaSerializableFunction(r => sortedTargetColumnsSet.contains(r.getColumnName)))
      .mapToPair(JFunction.toJavaSerializablePairFunction(r => {
        if (r.getMinValue == null && r.getMaxValue == null) {
          // Corresponding row could be null in either of the 2 cases
          //    - Column contains only null values (in that case both min/max have to be nulls)
          //    - This is a stubbed Column Stats record (used as a tombstone)
          collection.Pair.of(r.getFileName, r)
        } else {
          val minValueWrapper = r.getMinValue
          val maxValueWrapper = r.getMaxValue

          checkState(minValueWrapper != null && maxValueWrapper != null, "Invalid Column Stats record: either both min/max have to be null, or both have to be non-null")

          val colName = r.getColumnName
          val colType = tableSchemaFieldMap(colName).dataType

          val minValue = deserialize(tryUnpackValueWrapper(minValueWrapper), colType)
          val maxValue = deserialize(tryUnpackValueWrapper(maxValueWrapper), colType)

          // Update min-/max-value structs w/ unwrapped values in-place
          r.setMinValue(minValue)
          r.setMaxValue(maxValue)

          collection.Pair.of(r.getFileName, r)
        }
      }))
      .groupByKey()
      .map(JFunction.toJavaSerializableFunction(p => {
        val columnRecordsSeq: Seq[HoodieMetadataColumnStats] = p.getValue.asScala.toSeq
        val fileName: String = p.getKey
        val valueCount: Long = columnRecordsSeq.head.getValueCount

        // To properly align individual rows (corresponding to a file) w/in the transposed projection, we need
        // to align existing column-stats for individual file with the list of expected ones for the
        // whole transposed projection (a superset of all files)
        val columnRecordsMap = columnRecordsSeq.map(r => (r.getColumnName, r)).toMap
        val alignedColStatRecordsSeq = targetIndexedColumns.map(columnRecordsMap.get)

        val coalescedRowValuesSeq =
          alignedColStatRecordsSeq.foldLeft(ListBuffer[Any](fileName, valueCount)) {
            case (acc, opt) =>
              opt match {
                case Some(colStatRecord) =>
                  acc ++= Seq(colStatRecord.getMinValue, colStatRecord.getMaxValue, colStatRecord.getNullCount)
                case None =>
                  // NOTE: This could occur in either of the following cases:
                  //    1. When certain columns exist in the schema but are absent in some data files due to
                  //       schema evolution or other reasons, these columns will not be present in the column stats.
                  //       In this case, we fill in default values by setting the min, max and null-count to null
                  //       (this behavior is consistent with reading non-existent columns from Parquet).
                  //    2. When certain columns are present both in the schema and the data files,
                  //       but the column stats are absent for these columns due to their types not supporting indexing,
                  //       we also set these columns to default values.
                  //
                  // This approach prevents errors during data skipping and, because the filter includes an isNull check,
                  // these conditions will not affect the accurate return of files from data skipping.
                  acc ++= Seq(null, null, null)
              }
          }

        Row(coalescedRowValuesSeq.toSeq: _*)
      }))

    (transposedRows, indexSchema)
  }

  def composeIndexSchema(targetColumnNames: Seq[String], indexedColumns: Set[String], tableSchema: StructType, expressionIndexQuery: Expression): (StructType, Seq[String]) = {
    val fileNameField = StructField(HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME, StringType, nullable = true, Metadata.empty)
    val valueCountField = StructField(HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT, LongType, nullable = true, Metadata.empty)

    val targetIndexedColumns = targetColumnNames.filter(indexedColumns.contains(_))
    val targetIndexedFields = targetIndexedColumns.map(colName => tableSchema.fields.find(f => f.name == colName).get)

    val dataType: DataType = expressionIndexQuery match {
      case eq: EqualTo => eq.right.asInstanceOf[Literal].dataType
      case in: In => in.list(0).asInstanceOf[Literal].dataType
      case _ => targetIndexedFields(0).dataType
    }

    (StructType(
      targetIndexedFields.foldLeft(Seq(fileNameField, valueCountField)) {
        case (acc, field) =>
          acc ++ Seq(
            composeColumnStatStructType(field.name, HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE, dataType),
            composeColumnStatStructType(field.name, HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE, dataType),
            composeColumnStatStructType(field.name, HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT, LongType))
      }
    ), targetIndexedColumns)
  }

  def loadColumnStatsIndexRecords(targetColumns: Seq[String], prunedPartitions: Option[Set[String]] = None, shouldReadInMemory: Boolean): HoodieData[HoodieMetadataColumnStats] = {
    // Read Metadata Table's Column Stats Index records into [[HoodieData]] container by
    //    - Fetching the records from CSI by key-prefixes (encoded column names)
    //    - Extracting [[HoodieMetadataColumnStats]] records
    //    - Filtering out nulls
    checkState(targetColumns.nonEmpty)

    // TODO encoding should be done internally w/in HoodieBackedTableMetadata
    val encodedTargetColumnNames = targetColumns.map(colName => new ColumnIndexID(colName).asBase64EncodedString())
    // encode column name and parition name if partition list is available
    val keyPrefixes = if (prunedPartitions.isDefined) {
      prunedPartitions.get.map(partitionPath =>
        new PartitionIndexID(HoodieTableMetadataUtil.getPartitionIdentifier(partitionPath)).asBase64EncodedString()
      ).flatMap(encodedPartition => {
        encodedTargetColumnNames.map(encodedTargetColumn => encodedTargetColumn.concat(encodedPartition))
      })
    } else {
      encodedTargetColumnNames
    }

    val metadataRecords: HoodieData[HoodieRecord[HoodieMetadataPayload]] =
      metadataTable.getRecordsByKeyPrefixes(keyPrefixes.toSeq.asJava, HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS, shouldReadInMemory)

    val columnStatsRecords: HoodieData[HoodieMetadataColumnStats] =
      //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+
      metadataRecords.map(JFunction.toJavaSerializableFunction(record => {
          toScalaOption(record.getData.getInsertValue(null, null))
            .map(metadataRecord => metadataRecord.asInstanceOf[HoodieMetadataRecord].getColumnStatsMetadata)
            .orNull
        }))
        .filter(JFunction.toJavaSerializableFunction(columnStatsRecord => columnStatsRecord != null))

    columnStatsRecords
  }

  override def invalidateCaches(): Unit = {
    cachedColumnStatsIndexViews.foreach { case (_, df) => df.unpersist() }
    cachedColumnStatsIndexViews.clear()
  }

  /**
   * Return true if metadata table is enabled and expression index metadata partition is available.
   */
  def isIndexAvailable: Boolean = {
    metadataConfig.isEnabled && metaClient.getIndexMetadata.isPresent && !metaClient.getIndexMetadata.get().getIndexDefinitions.isEmpty
  }

  private def filterQueriesWithFunctionalFilterKey(queryFilters: Seq[Expression], sourceFieldOpt: Option[String]): List[Tuple2[Expression, List[String]]] = {
    var expressionIndexQueries: List[Tuple2[Expression, List[String]]] = List.empty
    for (query <- queryFilters) {
      val attributeFetcher = (expr: Expression) => {
        expr match {
          case expression: UnaryExpression => expression.child
          case expression: FromUnixTime => expression.sec
          case other => other
        }
      }
      filterQueryWithRecordKey(query, sourceFieldOpt, attributeFetcher).foreach({
        case (exp: Expression, literals: List[String]) =>
          expressionIndexQueries = expressionIndexQueries :+ Tuple2.apply(exp, literals)
      })
    }

    expressionIndexQueries
  }

  /**
   * Searches for an index partition based on the specified index function and target column name.
   *
   * This method looks up the index definitions available in the metadata of a `metaClient` instance
   * and attempts to find an index partition where the index function and the source fields match
   * the provided arguments. If a matching index definition is found, the partition identifier for
   * that index is returned.
   *
   * @param queryFilters A sequence of `Expression` objects to analyze. Each expression should involve a single column
   *                     for the method to consider it (expressions involving multiple columns are skipped).
   * @return An `Option` containing the index partition identifier if a matching index definition is found.
   *         Returns `None` if no matching index definition is found.
   */
  private def getExpressionIndexPartitionAndLiterals(queryFilters: Seq[Expression]): Option[Tuple3[String, Expression, List[String]]] = {
    val indexDefinitions = metaClient.getIndexMetadata.get().getIndexDefinitions.asScala
    if (indexDefinitions.nonEmpty) {
      val functionDefinitions = indexDefinitions.values
        .filter(definition => MetadataPartitionType.fromPartitionPath(definition.getIndexName).equals(MetadataPartitionType.EXPRESSION_INDEX))
        .toList
      var indexPartitionAndLiteralsOpt: Option[Tuple3[String, Expression, List[String]]] = Option.empty
      functionDefinitions.foreach(indexDefinition => {
        val queryInfoOpt = extractQueryAndLiterals(queryFilters, indexDefinition)
        if (queryInfoOpt.isDefined) {
          indexPartitionAndLiteralsOpt = Option.apply(Tuple3.apply(indexDefinition.getIndexName, queryInfoOpt.get._1, queryInfoOpt.get._2))
        }
      })
      indexPartitionAndLiteralsOpt
    } else {
      Option.empty
    }
  }

  /**
   * Extracts mappings from function names to column names from a sequence of expressions.
   *
   * This method iterates over a given sequence of Spark SQL expressions and identifies expressions
   * that contain function calls corresponding to keys in the `SPARK_FUNCTION_MAP`. It supports only
   * expressions that are simple binary expressions involving a single column. If an expression contains
   * one of the functions and operates on a single column, this method maps the function name to the
   * column name.
   */
  private def extractQueryAndLiterals(queryFilters: Seq[Expression], indexDefinition: HoodieIndexDefinition): Option[(Expression, List[String])] = {
    val expressionIndexQueries = filterQueriesWithFunctionalFilterKey(queryFilters, Option.apply(indexDefinition.getSourceFields.get(0)))
    var queryAndLiteralsOpt: Option[(Expression, List[String])] = Option.empty
    expressionIndexQueries.foreach { tuple =>
      val (expr, literals) = (tuple._1, tuple._2)
      val functionNameOption = SPARK_FUNCTION_MAP.asScala.keys.find(expr.toString.contains)
      val functionName = functionNameOption.getOrElse("identity")
      if (indexDefinition.getIndexFunction.equals(functionName)) {
        val attributeFetcher = (expr: Expression) => {
          expr match {
            case expression: UnaryExpression => expression.child
            case expression: FromUnixTime => expression.sec
            case other => other
          }
        }
        if (functionName.equals(SPARK_FROM_UNIXTIME)) {
          val configuredFormat = indexDefinition.getIndexOptions.getOrDefault("format", TimestampFormatter.defaultPattern)
          if (expr.toString().contains(configuredFormat)) {
            val pruningExpr = fetchQueryWithAttribute(expr, Option.apply(indexDefinition.getSourceFields.get(0)), RecordLevelIndexSupport.getSimpleLiteralGenerator(), attributeFetcher)._1.get._1
            queryAndLiteralsOpt = Option.apply(Tuple2.apply(pruningExpr, literals))
          }
        } else {
          val pruningExpr = fetchQueryWithAttribute(expr, Option.apply(indexDefinition.getSourceFields.get(0)), RecordLevelIndexSupport.getSimpleLiteralGenerator(), attributeFetcher)._1.get._1
          queryAndLiteralsOpt = Option.apply(Tuple2.apply(pruningExpr, literals))
        }
      }
    }
    queryAndLiteralsOpt
  }

  private def loadExpressionIndexRecords(indexPartition: String,
                                         prunedPartitions: Set[String],
                                         shouldReadInMemory: Boolean): HoodieData[HoodieMetadataColumnStats] = {
    val indexDefinition = metaClient.getIndexMetadata.get().getIndexDefinitions.get(indexPartition)
    val colStatsRecords: HoodieData[HoodieMetadataColumnStats] = loadExpressionIndexForColumnsInternal(
      indexDefinition.getSourceFields.asScala.toSeq, prunedPartitions, indexPartition, shouldReadInMemory)
    //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+
    colStatsRecords
  }

  def loadExpressionIndexDataFrame(indexPartition: String,
                                   prunedPartitions: Set[String],
                                   shouldReadInMemory: Boolean): DataFrame = {
    val colStatsDF = {
      val indexDefinition = metaClient.getIndexMetadata.get().getIndexDefinitions.get(indexPartition)
      val colStatsRecords: HoodieData[HoodieMetadataColumnStats] = loadExpressionIndexForColumnsInternal(
        indexDefinition.getSourceFields.asScala.toSeq, prunedPartitions, indexPartition, shouldReadInMemory)
      //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+
      val catalystRows: HoodieData[InternalRow] = colStatsRecords.map[InternalRow](JFunction.toJavaSerializableFunction(r => {
        val converter = AvroConversionUtils.createAvroToInternalRowConverter(HoodieMetadataColumnStats.SCHEMA$, columnStatsRecordStructType)
        converter(r).orNull
      }))

      if (shouldReadInMemory) {
        // NOTE: This will instantiate a [[Dataset]] backed by [[LocalRelation]] holding all of the rows
        //       of the transposed table in memory, facilitating execution of the subsequently chained operations
        //       on it locally (on the driver; all such operations are actually going to be performed by Spark's
        //       Optimizer)
        createDataFrameFromInternalRows(spark, catalystRows.collectAsList().asScala.toSeq, columnStatsRecordStructType)
      } else {
        createDataFrameFromRDD(spark, HoodieJavaRDD.getJavaRDD(catalystRows), columnStatsRecordStructType)
      }
    }

    colStatsDF.select(targetColumnStatsIndexColumns.map(col): _*)
  }

  private def loadExpressionIndexForColumnsInternal(targetColumns: Seq[String],
                                                    prunedPartitions: Set[String],
                                                    indexPartition: String,
                                                    shouldReadInMemory: Boolean): HoodieData[HoodieMetadataColumnStats] = {
    // Read Metadata Table's Expression Index records into [[HoodieData]] container by
    //    - Fetching the records from CSI by key-prefixes (encoded column names)
    //    - Extracting [[HoodieMetadataColumnStats]] records
    //    - Filtering out nulls
    checkState(targetColumns.nonEmpty)
    val encodedTargetColumnNames = targetColumns.map(colName => new ColumnIndexID(colName).asBase64EncodedString())
    val keyPrefixes = if (prunedPartitions.nonEmpty) {
      prunedPartitions.map(partitionPath =>
        new PartitionIndexID(HoodieTableMetadataUtil.getPartitionIdentifier(partitionPath)).asBase64EncodedString()
      ).flatMap(encodedPartition => {
        encodedTargetColumnNames.map(encodedTargetColumn => encodedTargetColumn.concat(encodedPartition))
      })
    } else {
      encodedTargetColumnNames
    }
    val metadataRecords: HoodieData[HoodieRecord[HoodieMetadataPayload]] =
      metadataTable.getRecordsByKeyPrefixes(keyPrefixes.toSeq.asJava, indexPartition, shouldReadInMemory)
    val columnStatsRecords: HoodieData[HoodieMetadataColumnStats] =
      //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+
      metadataRecords.map(JFunction.toJavaSerializableFunction(record => {
          toScalaOption(record.getData.getInsertValue(null, null))
            .map(metadataRecord => metadataRecord.asInstanceOf[HoodieMetadataRecord].getColumnStatsMetadata)
            .orNull
        }))
        .filter(JFunction.toJavaSerializableFunction(columnStatsRecord => columnStatsRecord != null))

    columnStatsRecords
  }

  def getPrunedPartitionsAndFileNamesMap(prunedPartitionsAndFileSlices: Seq[(Option[BaseHoodieTableFileIndex.PartitionPath], Seq[FileSlice])],
                                         includeLogFiles: Boolean = false): Map[String, Set[String]] = {
    prunedPartitionsAndFileSlices.foldLeft(Map.empty[String, Set[String]]) {
      case (partitionToFileMap, (partitionPathOpt, fileSlices)) =>
        partitionPathOpt match {
          case Some(partitionPath) =>
            val fileNames = fileSlices.flatMap { fileSlice =>
              val baseFile = Option(fileSlice.getBaseFile.orElse(null)).map(_.getFileName).toSeq
              val logFiles = if (includeLogFiles) {
                fileSlice.getLogFiles.iterator().asScala.map(_.getFileName).toSeq
              } else Seq.empty[String]
              baseFile ++ logFiles
            }.toSet

            // Update the map with the new partition and its file names
            partitionToFileMap.updated(partitionPath.path, partitionToFileMap.getOrElse(partitionPath.path, Set.empty) ++ fileNames)
          case None =>
            partitionToFileMap // Skip if no partition path
        }
    }
  }

  private def getCandidateFilesForKeys(indexPartition: String, prunedPartitionAndFileNames: Map[String, Set[String]], keys: List[String]): Set[String] = {
    val candidateFiles = prunedPartitionAndFileNames.flatMap { case (partition, fileNames) =>
      fileNames.filter { fileName =>
        val bloomFilterOpt = toScalaOption(metadataTable.getBloomFilter(partition, fileName, indexPartition))
        bloomFilterOpt match {
          case Some(bloomFilter) =>
            keys.exists(bloomFilter.mightContain)
          case None =>
            true // If bloom filter is empty or undefined, assume the file might contain the record key
        }
      }
    }.toSet

    candidateFiles
  }
}

object ExpressionIndexSupport {
  val INDEX_NAME = "FUNCTIONAL"
  /**
   * Target Column Stats Index columns which internally are mapped onto fields of the corresponding
   * Column Stats record payload ([[HoodieMetadataColumnStats]]) persisted w/in Metadata Table
   */
  private val targetColumnStatsIndexColumns = Seq(
    HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME,
    HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE,
    HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE,
    HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT,
    HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT,
    HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME
  )

  private val columnStatsRecordStructType: StructType = AvroConversionUtils.convertAvroSchemaToStructType(HoodieMetadataColumnStats.SCHEMA$)
}