All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.wrapper.table.YtTableUtils.scala Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.spyt.wrapper.table

import org.slf4j.LoggerFactory
import tech.ytsaurus.spyt.wrapper.Utils
import tech.ytsaurus.spyt.wrapper.YtJavaConverters.RichJavaMap
import tech.ytsaurus.spyt.wrapper.cypress.YtCypressUtils
import tech.ytsaurus.spyt.wrapper.operation.OperationStatus
import tech.ytsaurus.spyt.wrapper.transaction.YtTransactionUtils
import tech.ytsaurus.client.CompoundClient
import tech.ytsaurus.client.request._
import tech.ytsaurus.client.rows.WireRowDeserializer
import tech.ytsaurus.core.{DataSize, GUID}
import tech.ytsaurus.core.cypress.{CypressNodeType, YPath}
import tech.ytsaurus.rpcproxy.EOperationType
import tech.ytsaurus.ysontree.{YTreeBuilder, YTreeNode, YTreeTextSerializer}

import java.nio.ByteBuffer
import java.nio.file.Paths
import scala.annotation.tailrec
import scala.concurrent.duration._
import scala.language.postfixOps

trait YtTableUtils {
  self: YtCypressUtils with YtTransactionUtils =>

  private val log = LoggerFactory.getLogger(getClass)

  def createTable(path: String,
                  settings: YtTableSettings,
                  transaction: Option[String] = None,
                  ignoreExisting: Boolean = false)
                 (implicit yt: CompoundClient): Unit = {
    val parent = Paths.get(path).getParent
    if (parent != null) {
      createDir(parent.toString, transaction, ignoreExisting = true)
    }
    createTable(path, settings.options, transaction, ignoreExisting = ignoreExisting)
  }

  def createTable(path: String,
                  settings: YtTableSettings)
                 (implicit yt: CompoundClient): Unit = {
    createTable(path, settings, None)
  }

  def createTable(path: String,
                  options: Map[String, YTreeNode],
                  transaction: Option[String],
                  ignoreExisting: Boolean)
                 (implicit yt: CompoundClient): Unit = {
    log.debug(s"Create table: $path, transaction: $transaction")
    import scala.collection.JavaConverters._
    val request = CreateNode.builder()
      .setPath(YPath.simple(formatPath(path)))
      .setType(CypressNodeType.TABLE)
      .setAttributes(options.asJava)
      .setIgnoreExisting(ignoreExisting)
      .optionalTransaction(transaction)
    yt.createNode(request).join()
  }

  def readTable[T](path: YPath, deserializer: WireRowDeserializer[T], timeout: Duration = 1 minute,
                   transaction: Option[String] = None, reportBytesRead: Long => Unit = _ => ())
                  (implicit yt: CompoundClient): TableIterator[T] = {
    val request = ReadTable.builder()
      .setPath(path)
      .setSerializationContext(new ReadSerializationContext(deserializer))
      .setOmitInaccessibleColumns(true)
      .setUnordered(true)
      .optionalTransaction(transaction)
    val reader = yt.readTable(request).join()
    new TableIterator(reader, timeout, reportBytesRead)
  }

  def readTableArrowStream(path: String, timeout: Duration, transaction: Option[String],
                           reportBytesRead: Long => Unit)
                          (implicit yt: CompoundClient): YtArrowInputStream = {
    val request = ReadTable.builder[ByteBuffer]().setPath(path)
      .setSerializationContext(ReadSerializationContext.binaryArrow())
      .optionalTransaction(transaction)
    val reader = yt.readTable(request).join()
    new TableCopyByteStream(reader, timeout, reportBytesRead)
  }

  def readTableArrowStream(path: YPath, timeout: Duration = 1 minute,
                           transaction: Option[String] = None,
                           reportBytesRead: Long => Unit = _ => {})
                          (implicit yt: CompoundClient): YtArrowInputStream = {
    readTableArrowStream(path.toString, timeout, transaction, reportBytesRead)
  }

  private def startedBy(builder: YTreeBuilder): YTreeBuilder = {
    import tech.ytsaurus.spyt.BuildInfo
    builder
      .beginMap
      .key("user").value(System.getProperty("user.name"))
      .key("command").beginList.value("command").endList
      .key("hostname").value(Utils.ytHostnameOrIpAddress)
      .key("pid").value(ProcessHandle.current().pid())
      .key("wrapper_version").value(BuildInfo.ytClientVersion)
      .endMap
  }

  private def pathToTree(path: String, transaction: Option[String]): YTreeNode = {
    transaction.foldLeft(YPath.simple(formatPath(path))) { case (p, t) =>
      p.plusAdditionalAttribute("transaction_id", t)
    }.toTree
  }

  private def ysonPaths(paths: Seq[String], transaction: Option[String]): YTreeNode = {
    paths.foldLeft(new YTreeBuilder().beginList()) { case (b, path) =>
      b.value(pathToTree(path, transaction))
    }.endList().build()
  }

  private def operationResult(guid: GUID)(implicit yt: CompoundClient): String = {
    val info = yt.getOperation(new GetOperation(guid)).join()
    Option(info.asMap().get("result"))
      .map(YTreeTextSerializer.serialize).getOrElse("unknown")
  }

  @tailrec
  private def awaitOperation(guid: GUID)(implicit yt: CompoundClient): OperationStatus = {
    val info = yt.getOperation(new GetOperation(guid)).join()
    val status = OperationStatus.getByName(info.asMap().getOrThrow("state").stringValue())
    if (status.isFinished) {
      status
    } else {
      log.info(s"Operation $guid is in status $status")
      Thread.sleep((5 seconds).toMillis)
      awaitOperation(guid)
    }
  }

  def mergeTables(srcDir: String, dstTable: String,
                  sorted: Boolean,
                  transaction: Option[String] = None,
                  specParams: Map[String, YTreeNode] = Map.empty)
                 (implicit yt: CompoundClient): Unit = {
    log.debug(s"Merge tables: $srcDir -> $dstTable, transaction: $transaction")

    val srcList = dstTable +: listDir(srcDir, transaction).map(name => s"$srcDir/$name")

    val operationSpec = new YTreeBuilder()
      .beginMap()
      .key("mode").value(if (sorted) "sorted" else "unordered")
      .key("combine_chunks").value(false)
      .key("started_by").apply(startedBy)
      .key("input_table_paths").value(ysonPaths(srcList, None))
      .key("output_table_path").value(pathToTree(dstTable, None))
      .apply(specParams.foldLeft(_) { case (b, (k, v)) => b.key(k).value(v) })
      .endMap()
      .build()

    val operationRequest = new StartOperation(EOperationType.OT_MERGE, operationSpec).toBuilder
      .optionalTransaction(transaction)
    val guid = yt.startOperation(operationRequest).join()

    val finalStatus = awaitOperation(guid)

    if (!finalStatus.isSuccess) {
      throw new IllegalStateException(s"Merge operation finished with unsuccessful status $finalStatus, " +
        s"result is ${operationResult(guid)}")
    }
  }


  def splitTables(path: YPath, splitBytes: Long)(implicit yt: CompoundClient): Seq[MultiTablePartition] = {
    import scala.collection.JavaConverters._

    val request = new PartitionTables(
      java.util.List.of[YPath](path), PartitionTablesMode.Ordered, DataSize.fromBytes(splitBytes)
    )
    val result = yt.partitionTables(request).join()
    result.asScala.toList
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy