All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.internal.KernelSnapshotDelegator.scala Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.delta.standalone.internal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

import io.delta.kernel.internal.{SnapshotImpl => SnapshotImplKernel}
import io.delta.standalone.DeltaScan
import io.delta.standalone.actions.{AddFile => AddFileJ, Metadata => MetadataJ}
import io.delta.standalone.data.{CloseableIterator, RowRecord => RowParquetRecordJ}
import io.delta.standalone.expressions.Expression
import io.delta.standalone.internal.actions.{AddFile, Metadata, Protocol, SetTransaction}
import io.delta.standalone.internal.scan.DeltaScanImpl
import io.delta.standalone.internal.util.ConversionUtils


/**
 * This class is designed to be passed to OptimisticTransactionImpl, and provide exactly what that
 * needs to operate, but on a Kernel Snapshot rather than a standalone Snapshot.
 *
 * The methods/variables used by OptimisticTransactionImpl that we implement are:
 *  - protocolScala
 *  - metadataScala
 *  - version
 *  - getMetadata
 *
 *  Other functions that are used, we do not implement, but fall back to standalone. These functions
 *  are only called in "exceptional" cases, so should not overly impact performance.
 *  They include:
 *  - scanScala (only called in markFilesAsRead, not used by flink)
 *  - transactions (only used in txnVersion, which is used only on first commit for a flink app)
 *    - This is a val, but it calls setTransactionsScala, so we log for that
 *  - numOfFiles (only used in verifySchemaCompatibility, which happens only when a metadata update occures)
 *  - allFilesScala (only used in verifySchemaCompatibility)
 */
class KernelSnapshotDelegator(
    kernelSnapshot: SnapshotImplKernel,
    // This needs to be an argument to the constructor since the constructor of SnapshotImpl might call back
    // into things like `metadataScala`, and this needs to be already initalized for that
    kernelSnapshotWrapper: KernelSnapshotWrapper,
    hadoopConf: Configuration,
    path: Path,
    override val version: Long,
    kernelDeltaLog: KernelDeltaLogDelegator,
    standaloneDeltaLog: DeltaLogImpl)
  extends SnapshotImpl(hadoopConf, path, -1, LogSegment.empty(path), -1, standaloneDeltaLog, -1) {

  lazy val standaloneSnapshot: SnapshotImpl = standaloneDeltaLog.getSnapshotForVersionAsOf(getVersion())

  /**
   * Internal vals we need to override
   */
  override lazy val protocolScala: Protocol = {
    val kernelProtocol = kernelSnapshot.getProtocol()
    new Protocol(kernelProtocol.getMinReaderVersion(), kernelProtocol.getMinWriterVersion())
  }

  override lazy val metadataScala: Metadata = {
    val metadata = kernelSnapshotWrapper.getMetadata()
    ConversionUtils.convertMetadataJ(metadata)
  }

  // provide a path to use the faster txn lookup in kernel
  def getLatestTransactionVersion(id: String): Option[Long] = {
    val versionJOpt = kernelSnapshot.getLatestTransactionVersion(id)
    if (versionJOpt.isPresent) {
      Some(versionJOpt.get)
    } else {
      None
    }
  }

  // Public APIS
  override def getMetadata: MetadataJ = kernelSnapshotWrapper.getMetadata()
  override def getVersion: Long = kernelSnapshotWrapper.getVersion()

  // Internal apis that we need to verify don't get used often
  override def scanScala(): DeltaScanImpl = {
    logInfo("Calling scanScala on KernelSnapshotDelegator")
    standaloneSnapshot.scanScala()
  }
  override def setTransactionsScala: Seq[SetTransaction] = {
    logInfo("Calling setTransactionsScala on KernelSnapshotDelegator")
    standaloneSnapshot.setTransactionsScala
  }
  override def numOfFiles: Long = {
    logInfo("Calling numOfFiles on KernelSnapshotDelegator")
    standaloneSnapshot.numOfFiles
  }
  override def allFilesScala: Seq[AddFile] = {
    logInfo("Calling allFilesScala on KernelSnapshotDelegator")
    standaloneSnapshot.allFilesScala
  }


  // throw for the following, as we don't expect flink to use them
  override def scan(): DeltaScan = throw new RuntimeException()
  override def scan(predicate: Expression): DeltaScan = throw new RuntimeException()
  override def getAllFiles: java.util.List[AddFileJ] = throw new RuntimeException()
  override def open(): CloseableIterator[RowParquetRecordJ] = throw new RuntimeException()
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy