org.apache.spark.sql.delta.stats.DeltaScan.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of wasp-delta-lake_2.12 Show documentation
wasp-delta-lake
The newest version!
/*
 * Copyright (2020) The Delta Lake Project Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.delta.stats

import org.apache.spark.sql.delta.actions.AddFile
import com.fasterxml.jackson.databind.annotation.JsonDeserialize

import org.apache.spark.sql.catalyst.expressions._

/**
  * Note: Please don't add any new constructor to this class. `jackson-module-scala` always picks up
  * the first constructor returned by `Class.getConstructors` but the order of the constructors list
  * is non-deterministic. (SC-13343)
  */
case class DataSize(
  @JsonDeserialize(contentAs = classOf[java.lang.Long])
  bytesCompressed: Option[Long] = None,
  @JsonDeserialize(contentAs = classOf[java.lang.Long])
  rows: Option[Long] = None
)

object DataSize {
  def apply(a: ArrayAccumulator): DataSize = {
    DataSize(Option(a.value(0)).filterNot(_ == -1), Option(a.value(1)).filterNot(_ == -1))
  }
}

/**
  * Used to hold details the files and stats for a scan where we have already
  * applied filters and a limit.
  */
case class DeltaScan(version: Long, files: Seq[AddFile], total: DataSize, partition: DataSize, scanned: DataSize)(
  // Moved to separate argument list, to not be part of case class equals check -
  // expressions can differ by exprId or ordering, but as long as same files are scanned, the
  // PreparedDeltaFileIndex and HadoopFsRelation should be considered equal for reuse purposes.
  val partitionFilters: ExpressionSet,
  val dataFilters: ExpressionSet,
  val unusedFilters: ExpressionSet,
  val projection: AttributeSet
) {
  def allFilters: ExpressionSet = partitionFilters ++ dataFilters ++ unusedFilters
}