All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.wrapper.dyntable.YtDynTableUtils.scala Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.spyt.wrapper.dyntable

import org.slf4j.LoggerFactory
import tech.ytsaurus.spyt.wrapper.YtJavaConverters._
import tech.ytsaurus.spyt.wrapper.YtWrapper.createTable
import tech.ytsaurus.spyt.wrapper.cypress.{YtAttributes, YtCypressUtils}
import tech.ytsaurus.spyt.wrapper.table.YtTableSettings
import tech.ytsaurus.client.request.{GetTablePivotKeys, ModifyRowsRequest, ReshardTable, SelectRowsRequest}
import tech.ytsaurus.client.rows.UnversionedRowset
import tech.ytsaurus.client.{ApiServiceTransaction, CompoundClient, RetryPolicy}
import tech.ytsaurus.client.rpc.AlwaysSwitchRpcFailoverPolicy
import tech.ytsaurus.core.cypress.YPath
import tech.ytsaurus.core.tables.TableSchema
import tech.ytsaurus.ysontree.{YTreeBinarySerializer, YTreeBuilder, YTreeMapNode, YTreeNode}

import java.io.ByteArrayOutputStream
import java.time.{Duration => JDuration}
import java.util.concurrent.{CompletableFuture, Executors, ThreadFactory}
import scala.annotation.tailrec
import scala.collection.mutable
import scala.concurrent.TimeoutException
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.{Failure, Success, Try}

trait YtDynTableUtils {
  self: YtCypressUtils =>

  private val log = LoggerFactory.getLogger(getClass)

  type PivotKey = Array[Byte]
  val emptyPivotKey: PivotKey = serialiseYson(new YTreeBuilder().beginList().endList().build())
  private val executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
    override def newThread(runnable: Runnable): Thread = {
      val thread: Thread = Executors.defaultThreadFactory().newThread(runnable)
      thread.setDaemon(true)
      thread
    }
  })

  def serialiseYson(node: YTreeNode): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    try {
      YTreeBinarySerializer.serialize(node, baos)
      baos.toByteArray
    } finally {
      baos.close()
    }
  }

  def pivotKeysYson(path: YPath)(implicit yt: CompoundClient): Seq[YTreeNode] = {
    import scala.collection.JavaConverters._
    log.debug(s"Get pivot keys for $path")
    val res = yt.getTablePivotKeys(
        GetTablePivotKeys.builder().setPath(path.justPath().toString).setRepresentKeyAsList(true).build()
      )
      .join()
      .asScala
    res
  }

  def pivotKeys(path: String)(implicit yt: CompoundClient): Seq[PivotKey] = {
    pivotKeys(YPath.simple(formatPath(path)))
  }

  def pivotKeys(path: YPath)(implicit yt: CompoundClient): Seq[PivotKey] = {
    pivotKeysYson(path).map(serialiseYson)
  }

  def keyColumns(path: YPath, transaction: Option[String] = None)(implicit yt: CompoundClient): Seq[String] = {
    keyColumns(attribute(path, YtAttributes.keyColumns, transaction))
  }

  def keyColumns(attr: YTreeNode): Seq[String] = {
    import scala.collection.JavaConverters._
    attr.asList().asScala.map(_.stringValue())
  }

  def mountTable(path: String)(implicit yt: CompoundClient): Unit = {
    log.debug(s"Mount table: $path")
    yt.mountTable(formatPath(path)).join()
  }

  def mountTableSync(path: String, timeout: Duration = 20 seconds)(implicit yt: CompoundClient): Unit = {
    mountTable(path)
    waitState(path, TabletState.Mounted, timeout)
  }

  def unmountTableSync(path: String, timeout: Duration = 20 seconds)(implicit yt: CompoundClient): Unit = {
    unmountTable(path)
    waitState(path, TabletState.Unmounted, timeout)
  }

  def unmountTable(path: String)(implicit yt: CompoundClient): Unit = {
    log.debug(s"Unmount table: $path")
    yt.unmountTable(formatPath(path)).join()
  }

  def waitState(path: String, state: TabletState, timeout: JDuration)
               (implicit yt: CompoundClient): Unit = {
    waitState(path, state, toScalaDuration(timeout)).get
  }

  def waitState(path: String, state: TabletState, timeout: Duration)
               (implicit yt: CompoundClient): Try[Unit] = {
    @tailrec
    def waitUnmount(timeoutMillis: Long): Try[Unit] = {
      tabletState(path) match {
        case s if s == state => Success()
        case _ if timeoutMillis > 0 =>
          Thread.sleep(1000)
          waitUnmount(timeoutMillis - 1000)
        case _ => Failure(new TimeoutException)
      }
    }

    waitUnmount(timeout.toMillis)
  }

  def isDynamicTable(path: String)(implicit yt: CompoundClient): Boolean = {
    exists(path) && attributes(path, None, Set.empty[String]).get("dynamic").exists(_.boolValue())
  }

  def isDynTablePrepared(path: String)(implicit yt: CompoundClient): Boolean = {
    exists(path) && isMounted(path)
  }

  def isMounted(path: String)(implicit yt: CompoundClient): Boolean = {
    tabletState(path) == TabletState.Mounted
  }

  def createDynTableAndMount(path: String,
                             schema: TableSchema,
                             settings: Map[String, Any] = Map.empty,
                             ignoreExisting: Boolean = true)
                            (implicit yt: CompoundClient): Unit = {
    val tableExists = exists(path)
    val tabletMounted = tableExists && isMounted(path)

    if (tableExists && tabletMounted && !ignoreExisting) {
      throw new RuntimeException("Table already exists")
    }

    if (!tableExists) createDynTable(path, schema, settings)
    if (!tabletMounted) mountTableSync(path)
  }

  private val cachedCreatedTables = mutable.Queue.empty[String]
  private val cachedCreatedTablesMaxSize = 10

  def createDynTableAndMountCached(path: String,
                                   schema: TableSchema,
                                   settings: Map[String, Any] = Map.empty,
                                   ignoreExisting: Boolean = true)
                                  (implicit yt: CompoundClient): Unit = {
    if (!cachedCreatedTables.contains(path)) {
      createDynTableAndMount(path, schema, settings, ignoreExisting)
      cachedCreatedTables.enqueue(path)
      if (cachedCreatedTables.size > cachedCreatedTablesMaxSize) {
        cachedCreatedTables.dequeue()
      }
    }
  }

  def createDynTable(path: String, schema: TableSchema, settings: Map[String, Any] = Map.empty)(implicit yt: CompoundClient): Unit = {
    createTable(path, new YtTableSettings {
      override def ytSchema: YTreeNode = schema.toYTree

      override def optionsAny: Map[String, Any] = settings + ("dynamic" -> "true")
    })
  }

  private def selectRowsRequest(query: String, path: String,
                                transaction: Option[ApiServiceTransaction] = None)
                               (implicit yt: CompoundClient): Seq[YTreeMapNode] = {
    import scala.collection.JavaConverters._
    val request = SelectRowsRequest.of(query)

    waitState(path, TabletState.Mounted, 60 seconds)
    val f: ApiServiceTransaction => UnversionedRowset = _.selectRows(request).get(10, MINUTES)
    val selected = if (transaction.isEmpty) {
      runWithRetry(f)
    } else {
      f(transaction.get)
    }
    selected.getYTreeRows.asScala.toList
  }

  def selectRows(path: String, condition: Option[String] = None,
                 transaction: Option[ApiServiceTransaction] = None,
                 columns: Seq[String] = Nil)(implicit yt: CompoundClient): Seq[YTreeMapNode] = {
    selectRowsRequest(
      s"""${ if (columns.nonEmpty) columns.mkString(", ") else "*" } from [${formatPath(path)}] ${condition.map("where " + _).mkString}""",
      path, transaction)
  }

  def countRows(path: String, condition: Option[String] = None,
                 transaction: Option[ApiServiceTransaction] = None)(implicit yt: CompoundClient): Long = {
    selectRowsRequest(
      s"""SUM(1) as count from [${formatPath(path)}] ${condition.map("where " + _).mkString} group by 1""",
      path, transaction).headOption.map(_.getLong("count")).getOrElse(0L)
  }

  private def processModifyRowsRequest(request: ModifyRowsRequest,
                                       transaction: Option[ApiServiceTransaction] = None)
                                      (implicit yt: CompoundClient): Unit = {
    val f: ApiServiceTransaction => Unit = _.modifyRows(request).get(1, MINUTES)
    if (transaction.isEmpty) {
      runWithRetry(f)
    } else {
      f(transaction.get)
    }
  }

  def runWithRetry[T](f: ApiServiceTransaction => T)(implicit yt: CompoundClient): T = {
    val rowsFuture = yt.retryWithTabletTransaction(
      transaction => CompletableFuture.supplyAsync(() => f(transaction)),
      executor,
      RetryPolicy.attemptLimited(3, RetryPolicy.fromRpcFailoverPolicy(new AlwaysSwitchRpcFailoverPolicy))
    ).join()
    rowsFuture
  }

  def insertRows(path: String, schema: TableSchema, rows: java.util.List[java.util.List[Any]],
                 parentTransaction: Option[ApiServiceTransaction])(implicit yt: CompoundClient): Unit = {
    processModifyRowsRequest(
      ModifyRowsRequest.builder()
        .setPath(formatPath(path))
        .setSchema(schema)
        .addInserts(rows)
        .build(),
      parentTransaction)
  }

  def insertRows(path: String, schema: TableSchema, rows: Seq[Seq[Any]],
                 parentTransaction: Option[ApiServiceTransaction] = None)(implicit yt: CompoundClient): Unit = {
    import scala.collection.JavaConverters._
    processModifyRowsRequest(
      ModifyRowsRequest.builder()
        .setPath(formatPath(path))
        .setSchema(schema)
        .addInserts(rows.map(_.asJava).asJava)
        .build(),
      parentTransaction)
  }

  def updateRow(path: String, schema: TableSchema, map: java.util.Map[String, Any],
                parentTransaction: Option[ApiServiceTransaction] = None)(implicit yt: CompoundClient): Unit = {
    processModifyRowsRequest(
      ModifyRowsRequest.builder()
        .setPath(formatPath(path))
        .setSchema(schema)
        .addUpdate(map)
        .build(),
      parentTransaction
    )
  }

  def deleteRow(path: String, schema: TableSchema, map: java.util.Map[String, Any],
                parentTransaction: Option[ApiServiceTransaction] = None)(implicit yt: CompoundClient): Unit = {
    processModifyRowsRequest(
      ModifyRowsRequest.builder()
        .setPath(formatPath(path))
        .setSchema(schema)
        .addDelete(map)
        .build(),
      parentTransaction
    )
  }

  def deleteRows(path: String, schema: TableSchema, rows: Seq[java.util.Map[String, Any]],
                parentTransaction: Option[ApiServiceTransaction] = None)(implicit yt: CompoundClient): Unit = {
    val request = rows.foldLeft(
      ModifyRowsRequest.builder()
        .setPath(formatPath(path))
        .setSchema(schema)
    ) {
      case (req, next) => req.addDelete(next)
    }
    processModifyRowsRequest(request.build(), parentTransaction)
  }

  def tabletState(path: String)(implicit yt: CompoundClient): TabletState = {
    TabletState.fromString(attribute(formatPath(path), YtAttributes.tabletState).stringValue())
  }

  def remountTable(path: String)(implicit yt: CompoundClient): Unit = {
    yt.remountTable(formatPath(path)).join()
  }

  def maxAvailableTimestamp(path: YPath, transaction: Option[String] = None)
                           (implicit yt: CompoundClient): Long = {
    if (isDynamicStoreReadEnabled(path, transaction)) {
      yt.generateTimestamps().join().getValue
    } else {
      attribute(path, "unflushed_timestamp", transaction).longValue() - 1
    }
  }

  def isDynamicStoreReadEnabled(path: YPath, transaction: Option[String] = None)
                               (implicit yt: CompoundClient): Boolean = {
    attribute(path, "enable_dynamic_store_read", transaction).boolValue()
  }

  def reshardTable(path: String, schema: TableSchema, pivotKeys: Seq[Seq[Any]])
                  (implicit yt: CompoundClient): Unit = {
    import scala.collection.JavaConverters._
    val rawRequest = ReshardTable.builder()
      .setPath(YPath.simple(formatPath(path)))
      .setSchema(schema)
    pivotKeys.foreach { key =>
      rawRequest.addPivotKey(key.asJava)
    }
    yt.reshardTable(rawRequest.build()).join()
  }

  sealed abstract class TabletState(val name: String)

  object TabletState {

    case object Mounted extends TabletState("mounted")

    case object Unmounted extends TabletState("unmounted")

    case object Unknown extends TabletState("")

    def fromString(str: String): TabletState = {
      Seq(Mounted, Unmounted).find(_.name == str).getOrElse(Unknown)
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy