All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.fs.eventlog.YtEventLogFsOutputStream.scala Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.spyt.fs.eventlog

import org.apache.hadoop.conf.Configuration
import org.slf4j.LoggerFactory
import tech.ytsaurus.spyt.fs.conf._
import tech.ytsaurus.spyt.fs.PathUtils.getMetaPath
import tech.ytsaurus.spyt.wrapper.YtWrapper
import tech.ytsaurus.spyt.wrapper.model.EventLogSchema.{metaSchema, schema}
import tech.ytsaurus.client.{ApiServiceTransaction, CompoundClient}

import java.io.OutputStream
import java.time.Clock
import java.util.UUID
import scala.annotation.tailrec

class YtEventLogFsOutputStream(conf: Configuration, path: String, fileName: String, clock: Clock, implicit val yt: CompoundClient) extends OutputStream {
  private val log = LoggerFactory.getLogger(getClass)

  private val metaPath = getMetaPath(path)
  private val rowSize = conf.get("yt.dynTable.rowSize", "16777216").toInt
  private val buffer: Array[Byte] = new Array[Byte](rowSize)

  private case class State(bufferPos: Int,
                           lastFlushPos: Int,
                           flushedDataSize: Long,
                           blockCount: Int,
                           blockUpdateCount: Int) {
    def currentBlockIsFull(): Boolean = {
      bufferPos == rowSize
    }

    def prevFlushWasPartial(): Boolean = {
      lastFlushPos < rowSize
    }

    def nextBlockStartPos: Int = {
      if (currentBlockIsFull()) {
        0
      } else {
        bufferPos
      }
    }

    def incBufferPos(length: Int = 1): State = {
      copy(bufferPos = bufferPos + length)
    }

    def newDataSize: Int = {
      val dataStartPos = if (lastFlushPos < rowSize) lastFlushPos else 0
      bufferPos - dataStartPos
    }

    def hasNewData: Boolean = {
      newDataSize > 0
    }
  }

  private var state = State(0, rowSize, 0, 0, 0)
  private val id = s"${UUID.randomUUID()}"
  private var started = false

  open()

  def open(): Unit = {
    try {
      val extraTableOpts = conf.getConfWithPrefix("eventLog.extraTableOpts")
      YtWrapper.createDynTableAndMount(path, schema, extraTableOpts)
      YtWrapper.createDynTableAndMount(metaPath, metaSchema, extraTableOpts)
      YtWrapper.runWithRetry(transaction => updateInfo(Some(transaction)))
      started = true
    } catch {
      case e: Throwable => log.error(s"Initialization failed, all logs will be lost", e)
    }
  }

  override def write(b: Int): Unit = {
    if (started) {
      writeImpl(b)
    }
  }

  override def write(event: Array[Byte], off: Int, len: Int): Unit = {
    if (started) {
      writeImpl(event, off, len)
    }
  }

  @tailrec
  private def writeImpl(b: Int): Unit = {
    if (state.currentBlockIsFull()) {
      if (flushImpl()) {
        writeImpl(b)
      } // else loss of events
    } else {
      buffer(state.bufferPos) = b.toByte
      state = state.incBufferPos()
    }
  }

  @tailrec
  private def writeImpl(event: Array[Byte], off: Int, len: Int): Unit = {
    val free = rowSize - state.bufferPos
    val copyLen = Math.min(free, len)
    System.arraycopy(event, off, buffer, state.bufferPos, copyLen)
    state = state.incBufferPos(copyLen)
    if (free < len) {
      if (flushImpl()) {
        writeImpl(event, off + free, len - free)
      } // else loss of events
    }
  }

  private def tryWithConsistentState(f: => Unit): Unit = {
    val prevState = state
    try {
      f
    } catch {
      case e: Throwable =>
        log.error(s"Error while uploading logs with id=$id and order=${state.blockCount}", e)
        state = prevState
    }
  }

  private def writeNewBlock(data: Array[Byte]): Boolean = {
    var success = false
    tryWithConsistentState {
      state = State(
        bufferPos = state.nextBlockStartPos,
        lastFlushPos = state.bufferPos,
        flushedDataSize = state.flushedDataSize + state.newDataSize,
        blockCount = state.blockCount + 1,
        blockUpdateCount = 0
      )
      updateWithRetry { transaction =>
        YtWrapper.insertRows(
          path, schema, List(new YtEventLogBlock(id, state.blockCount, data).toList), Some(transaction)
        )
      }
      success = true
    }
    success
  }

  private def updateBlock(data: Array[Byte]): Boolean = {
    var success = false
    tryWithConsistentState {
      state = State(
        bufferPos = state.nextBlockStartPos,
        lastFlushPos = state.bufferPos,
        flushedDataSize = state.flushedDataSize + state.newDataSize,
        blockCount = state.blockCount,
        blockUpdateCount = state.blockUpdateCount + 1
      )
      updateWithRetry { transaction =>
        YtWrapper.updateRow(
          path, schema, new YtEventLogBlock(id, state.blockCount, data).toJavaMap, Some(transaction)
        )
      }
      success = true
    }
    success
  }

  private def updateWithRetry(code: ApiServiceTransaction => Unit): Unit = {
    var tryCount = 0
    YtWrapper.runWithRetry { transaction =>
      tryCount += 1
      code(transaction)
      updateInfo(Some(transaction))
    }
    if (tryCount > 1) {
      log.info(s"UUID: $id (filename: $fileName) was updated using $tryCount tries")
    }
  }

  private def flushImpl(forced: Boolean = false): Boolean = {
    if (state.hasNewData) {
      val data =
        if (state.currentBlockIsFull()) buffer
        else buffer.slice(0, state.bufferPos)
      if (state.prevFlushWasPartial()) {
        if (state.blockUpdateCount < 3 || forced || state.currentBlockIsFull()) {
          updateBlock(data)
        } else {
          false
        }
      } else {
        writeNewBlock(data)
      }
    } else {
      false
    }
  }

  override def flush(): Unit = {
    if (started) {
      flushImpl()
    }
  }

  private def updateInfo(transaction: Option[ApiServiceTransaction] = None): Unit = {
    YtWrapper.updateRow(
      metaPath,
      metaSchema,
      new YtEventLogFileDetails(
        fileName, id, new YtEventLogFileMeta(rowSize, state.blockCount, state.flushedDataSize, clock.millis())
      ).toJavaMap,
      transaction
    )
  }

  override def close(): Unit = {
    if (started) {
      flushImpl(true)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy