All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.fs.eventlog.YtEventLogFsInputStream.scala Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.spyt.fs.eventlog

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FSInputStream
import org.slf4j.LoggerFactory
import tech.ytsaurus.spyt.wrapper.YtWrapper
import tech.ytsaurus.spyt.wrapper.YtWrapper.RichLogger
import tech.ytsaurus.spyt.wrapper.model.EventLogSchema.Key._
import tech.ytsaurus.client.CompoundClient

import java.io.IOException
import scala.annotation.tailrec

class YtEventLogFsInputStream(conf: Configuration, path: String, details: YtEventLogFileDetails, implicit val yt: CompoundClient) extends FSInputStream {
  private val log = LoggerFactory.getLogger(getClass)

  private val singleReadLimit = conf.get("fs.ytEventLog.singleReadLimit", "268435456").toInt
  private var finished = false
  private var manualClosed = false

  private var order: Int = _
  private var globalPos: Long = _
  private var posInCurrentBlock: Int = _
  private var currentBlock: Array[Byte] = _

  resetToStreamStart()

  private def resetToStreamStart(): Unit = {
    order = 0
    globalPos = 0
    posInCurrentBlock = 0
    currentBlock = new Array[Byte](0)
  }

  private def countAvailableInBuffer(): Int = {
    currentBlock.length - posInCurrentBlock
  }

  @tailrec
  override final def seek(pos: Long): Unit = {
    checkClosed()
    if (finished) return
    if (pos < 0) throw new IllegalArgumentException("Negative seek position")
    else if (getPos < pos) {
      val available = countAvailableInBuffer()
      val needRead = pos - getPos
      if (available >= needRead) {
        movePos(needRead.toInt)
      } else {
        movePos(available)
        val left = needRead - available
        val skipBlocksCnt = left / details.meta.rowSize
        order += skipBlocksCnt.toInt
        globalPos += skipBlocksCnt * details.meta.rowSize
        loadNewBlocks()
        seek(pos)
      }
    } else if (getPos > pos) {
      resetToStreamStart()
      seek(pos)
    }
  }

  override def getPos: Long = globalPos

  override def seekToNewSource(targetPos: Long): Boolean = ???

  private def loadNewBlocks(count: Int = 1): Unit = {
    val rows = {
      try {
        YtWrapper.runWithRetry {
          transaction =>
            YtWrapper.selectRows(
              path,
              Some(s"""$ID="${details.id}" and $ORDER > $order and $ORDER <= ${order + count}"""),
              Some(transaction)
            )
        }
      } catch {
        case e: Throwable =>
          log.error(s"Logs(id=$ID, $order YtEventLogBlock(x))
    order += count
    currentBlock = new Array[Byte](blocks.map(_.log.length).sum)
    blocks.foldLeft(0) {
      case (index, next) =>
        System.arraycopy(next.log, 0, currentBlock, index, next.log.length)
        index + next.log.length
    }

    posInCurrentBlock = 0
    if (currentBlock.isEmpty) {
      finished = true
    }
  }

  def ready(): Boolean = {
    !manualClosed && countAvailableInBuffer() > 0
  }

  override def read(): Int = {
    checkClosed()
    if (finished) return -1
    if (!ready()) {
      loadNewBlocks()
    }
    if (ready()) {
      movePos(1)
      currentBlock(posInCurrentBlock - 1)
    } else {
      -1
    }
  }

  @tailrec
  private def readImpl(b: Array[Byte], off: Int, len: Int, acc: Int): Int = {
    if (finished) return acc
    val copied = copyAvailable(b, off, len)
    if (copied == len) {
      acc + copied
    } else {
      val newOff = off + copied
      val newLen = len - copied
      loadNewBlocks(Math.min(singleReadLimit / details.meta.rowSize, newLen / details.meta.rowSize + 1))
      readImpl(b, newOff, newLen, acc + copied)
    }
  }

  override def read(b: Array[Byte], off: Int, len: Int): Int = {
    checkClosed()
    val res = readImpl(b, off, len, 0)
    if (res > 0) {
      res
    } else {
      -1
    }
  }

  override def skip(n: Long): Long = {
    log.debugLazy(s"Skip $n started; details: $details; is file finished: $finished; " +
      s"read blocks: $order; current position: $globalPos")
    if (finished && n > 0) {
      throw new IOException(s"Try to skip $n in $details, but data is finished")
    } else {
      val oldPos = getPos
      seek(n + oldPos)
      getPos - oldPos
    }
  }

  def copyAvailable(b: Array[Byte], off: Int, len: Int): Int = {
    val willBeCopied = Math.min(countAvailableInBuffer(), len)
    System.arraycopy(currentBlock, posInCurrentBlock, b, off, willBeCopied)
    movePos(willBeCopied)
    willBeCopied
  }

  private def movePos(count: Int): Unit = {
    globalPos += count
    posInCurrentBlock += count
  }

  private def checkClosed(): Unit = {
    if (manualClosed) {
      throw new IOException("Reading from closed stream")
    }
  }

  override def close(): Unit = {
    log.debugLazy(s"Close $path, ${details.fileName}")
    finished = true
    manualClosed = true
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy