All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.fs.eventlog.YtEventLogFileSystem.scala Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.spyt.fs.eventlog

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs._
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.hadoop.util.Progressable
import org.slf4j.LoggerFactory
import tech.ytsaurus.spyt.fs.conf._
import tech.ytsaurus.spyt.fs.PathUtils.{getMetaPath, hadoopPathToYt}
import tech.ytsaurus.spyt.fs.YtClientConfigurationConverter.ytClientConfiguration
import tech.ytsaurus.spyt.wrapper.{LogLazy, YtWrapper}
import tech.ytsaurus.spyt.wrapper.client.{YtClientConfiguration, YtClientProvider, YtRpcClient}
import tech.ytsaurus.spyt.wrapper.cypress.PathType
import tech.ytsaurus.spyt.wrapper.model.EventLogSchema.Key._
import tech.ytsaurus.spyt.wrapper.model.EventLogSchema._
import tech.ytsaurus.client.{ApiServiceTransaction, CompoundClient}

import java.io.FileNotFoundException
import java.net.URI
import java.time.Clock
import java.util
import java.util.UUID
import scala.util.{Failure, Success, Try}

class YtEventLogFileSystem extends FileSystem with LogLazy {
  val id: String = UUID.randomUUID().toString

  private val log = LoggerFactory.getLogger(getClass)

  private var _uri: URI = _
  private var _workingDirectory: Path = new Path("/")
  protected var _ytConf: YtClientConfiguration = _
  protected lazy val yt: CompoundClient = YtClientProvider.ytClient(_ytConf, id)

  private var clock = Clock.systemUTC()

  private[eventlog] def setClock(clock: Clock): Unit = {
    this.clock = clock
  }

  override def initialize(uri: URI, conf: Configuration): Unit = {
    super.initialize(uri, conf)
    setConf(conf)
    this._uri = uri
    this._ytConf = ytClientConfiguration(getConf)
  }

  override def create(f: Path, permission: FsPermission, overwrite: Boolean, bufferSize: Int,
                      replication: Short, blockSize: Long, progress: Progressable): FSDataOutputStream = {
    val (tablePath, fullTableName) = splitTablePath(f)
    val ytTablePath = hadoopPathToYt(tablePath)

    if (!overwrite && exists(f)) {
      throw new FileAlreadyExistsException()
    }

    YtWrapper.createDir(hadoopPathToYt(tablePath.getParent), None, ignoreExisting = true)(yt)

    def createFile(ytRpcClient: Option[YtRpcClient], ytClient: CompoundClient): FSDataOutputStream = {
      statistics.incrementWriteOps(1)
      new FSDataOutputStream(new YtEventLogFsOutputStream(getConf, ytTablePath, fullTableName, clock, ytClient), statistics)
    }

    val oldDetails = getFileDetailsImpl(ytTablePath, fullTableName)
    val out = createFile(None, yt)

    oldDetails match {
      case Some(v) =>
        YtWrapper.runWithRetry(transaction => {
          deleteAllRowsWithId(ytTablePath, v.id, v.meta.blocksCnt, Some(transaction))
        })(yt)
      case _ =>
    }

    out
  }

  def splitTablePath(f: Path): (Path, String) = {
    (f.getParent, f.getName)
  }

  private def isCreatedAndMounted(f: Path): Boolean = {
    YtWrapper.exists(hadoopPathToYt(f))(yt) && YtWrapper.tabletState(hadoopPathToYt(f))(yt) == YtWrapper.TabletState.Mounted
  }

  override def exists(f: Path): Boolean = {
    log.debugLazy(s"Exists $f")
    getFileStatusEither(f).toOption.exists(_ != null)
  }

  def existsTable(f: Path): Boolean = {
    isCreatedAndMounted(f) && isCreatedAndMounted(new Path(getMetaPath(f)))
  }

  override def getUri: URI = _uri

  override def open(f: Path, bufferSize: Int): FSDataInputStream = {
    log.debugLazy(s"Open $f")
    val (tablePath, fullTableName) = splitTablePath(f)
    getFileDetailsImpl(hadoopPathToYt(tablePath), fullTableName) match {
      case None => throw new IllegalArgumentException("No such file found")
      case Some(details) => new FSDataInputStream(new YtEventLogFsInputStream(getConf, hadoopPathToYt(tablePath), details, yt))
    }
  }

  override def append(f: Path, bufferSize: Int, progress: Progressable): FSDataOutputStream = ???

  override def rename(src: Path, dst: Path): Boolean = {
    implicit val ytClient: CompoundClient = yt

    val (srcTablePath, srcName) = splitTablePath(src)
    val srcMetaTablePath = getMetaPath(srcTablePath)
    val (dstTablePath, dstName) = splitTablePath(dst)
    val dstMetaTablePath = getMetaPath(dstTablePath)
    if (srcTablePath == dstTablePath) {
      YtWrapper.runWithRetry(transaction => {
        getFileDetailsImpl(hadoopPathToYt(srcTablePath), srcName, Some(transaction)).exists {
          details => {
            YtWrapper.deleteRow(hadoopPathToYt(srcMetaTablePath), metaSchema,
              util.Map.of(FILENAME, srcName), Some(transaction))
            YtWrapper.insertRows(hadoopPathToYt(dstMetaTablePath), metaSchema,
              List(details.copy(fileName = dstName).toList), Some(transaction))
            true
          }
        }
      })
    } else {
      throw new IllegalArgumentException("Renaming doesn't support different parent tables")
    }
  }

  private def deleteAllRowsWithId(path: String, id: String, blocksCnt: Int, transaction: Option[ApiServiceTransaction]): Unit = {
    implicit val ytClient: CompoundClient = yt
    for (i <- 1 to blocksCnt) {
      YtWrapper.deleteRow(path, schema,
        java.util.Map.of(ID, id, ORDER, i), transaction)
    }
  }

  override def delete(f: Path, recursive: Boolean): Boolean = {
    log.debugLazy(s"Delete $f")
    implicit val ytClient: CompoundClient = yt

    val (tablePath, fullTableName) = splitTablePath(f)
    val tablePathStr = hadoopPathToYt(tablePath)
    val meta_path = getMetaPath(tablePathStr)
    YtWrapper.runWithRetry(transaction => {
      getFileDetailsImpl(tablePathStr, fullTableName, Some(transaction)).exists(details => {
        YtWrapper.deleteRow(meta_path, metaSchema, java.util.Map.of(FILENAME, fullTableName), Some(transaction))
        deleteAllRowsWithId(tablePathStr, details.id, details.meta.blocksCnt, Some(transaction))
        true
      })
    })
  }

  override def listStatus(f: Path): Array[FileStatus] = {
    log.debugLazy(s"List status $f")
    val meta_path = getMetaPath(hadoopPathToYt(f))
    implicit val ytClient: CompoundClient = yt

    val pathType = YtWrapper.pathType(hadoopPathToYt(f), None)
    pathType match {
      case PathType.Table =>
        if (!existsTable(f)) {
          throw new IllegalArgumentException(s"Corrupted table found at $f")
        }
        val rows = YtWrapper.selectRows(meta_path, None)
        rows.map(YtEventLogFileDetails.apply).map {
          details => {
            new FileStatus(
              details.meta.length, false, 1, 0,
              details.meta.modificationTs, new Path(f, details.fileName))
          }
        }.toArray
      case _ => throw new IllegalArgumentException(s"Can't list $f")
    }
  }

  override def setWorkingDirectory(new_dir: Path): Unit = {
    _workingDirectory = new_dir
  }

  override def getWorkingDirectory: Path = _workingDirectory

  override def mkdirs(f: Path, permission: FsPermission): Boolean = {
    implicit val ytClient: CompoundClient = yt
    YtWrapper.createDir(hadoopPathToYt(f.getParent), ignoreExisting = true)
    val path = hadoopPathToYt(f)
    val extraTableOpts = getConf.getConfWithPrefix("eventLog.extraTableOpts")
    YtWrapper.createDynTableAndMount(path, schema, extraTableOpts)
    YtWrapper.createDynTableAndMount(getMetaPath(path), metaSchema, extraTableOpts)
    true
  }

  def getFileDetailsImpl(path: String,
                         fileName: String,
                         transaction: Option[ApiServiceTransaction] = None): Option[YtEventLogFileDetails] = {
    log.debugLazy(s"Get details $path, $fileName")
    implicit val ytClient: CompoundClient = yt
    val meta_path = getMetaPath(path)
    if (!YtWrapper.exists(meta_path)) {
      None
    } else {
      val selectedRows = YtWrapper.selectRows(meta_path,
        Some(s"""$FILENAME="$fileName""""), transaction)
      selectedRows match {
        case Nil => None
        case meta :: Nil => Some(YtEventLogFileDetails(meta))
        case many => throw new RuntimeException(
          s"Meta table $meta_path has a few rows with file_name=$fileName: ${many.map(YtEventLogFileDetails(_))}")
      }
    }
  }

  private def getFileStatusEither(f: Path): Try[FileStatus] = Try {
    implicit val ytClient: CompoundClient = yt

    val (tablePath, fullTableName) = splitTablePath(f)
    val tablePathStr = hadoopPathToYt(tablePath)

    if (!YtWrapper.exists(tablePathStr)) {
      throw new FileNotFoundException(s"Path $tablePathStr doesn't exist")
    } else {
      val parentPathType = YtWrapper.pathType(tablePathStr, None)
      parentPathType match {
        case PathType.Table =>
          if (!existsTable(tablePath)) {
            throw new FileNotFoundException(s"Corrupted table found at $f")
          } else {
            getFileDetailsImpl(tablePathStr, fullTableName) match {
              case Some(details) =>
                new FileStatus(
                  details.meta.length, false, 1, 0, details.meta.modificationTs, f
                )
              case _ => throw new FileNotFoundException(s"File $fullTableName doesn't exist in $tablePathStr")
            }
          }
        case PathType.Directory =>
          val fStr = hadoopPathToYt(f)
          if (!YtWrapper.exists(fStr)) {
            throw new FileNotFoundException(s"Path $fStr doesn't exist")
          } else {
            val pathType = YtWrapper.pathType(fStr, None)
            pathType match {
              case PathType.Table =>
                new FileStatus(0, true, 1, 0, YtWrapper.modificationTimeTs(fStr), f)
              case _ => null
            }
          }
        case _ => null
      }
    }
  }

  override def getFileStatus(f: Path): FileStatus = {
    log.debugLazy(s"Get file status $f")

    val res = getFileStatusEither(f)
    res match {
      case Failure(e) => throw e
      case Success(v) => v
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy