All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.fs.YtCachedFileSystem.scala Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.spyt.fs

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs._
import org.slf4j.LoggerFactory

import java.io._
import java.net.URI
import java.nio.file.{Files, StandardCopyOption}
import java.util.UUID
import scala.language.postfixOps

@SerialVersionUID(1L)
class YtCachedFileSystem extends YtFileSystem {
  private val log = LoggerFactory.getLogger(getClass)

  private val DIRECTORY_FOR_CACHED_FILES = "cachedDownloadedFiles"
  private var cachedDirPath: java.nio.file.Path = _

  override def initialize(uri: URI, conf: Configuration): Unit = {
    super.initialize(uri, conf)
    cachedDirPath = java.nio.file.Path.of(
      Option(System.getenv("SPARK_HOME")).getOrElse(System.getProperty("java.io.tmpdir")),
      DIRECTORY_FOR_CACHED_FILES)
    log.info(s"Path to cached files: $cachedDirPath")
  }

  private def getCachedFile(f: Path): File = {
    val filename = f.toUri.getPath.replace('/', '+')
    val localPath = cachedDirPath.resolve(filename)
    localPath.toFile
  }

  private def copyStream(in: InputStream, out: OutputStream, closeStreams: Boolean = true): Unit = {
    try {
      val buf = new Array[Byte](8192)
      var n = 0
      while (n != -1) {
        n = in.read(buf)
        if (n != -1) {
          out.write(buf, 0, n)
        }
      }
    } finally {
      if (closeStreams) {
        try {
          in.close()
        } finally {
          out.close()
        }
      }
    }
  }

  private def moveFile(in: File, out: File): Unit = {
    Files.move(
      java.nio.file.Path.of(in.getPath),
      java.nio.file.Path.of(out.getPath),
      StandardCopyOption.REPLACE_EXISTING
    )
  }

  override def open(f: Path, bufferSize: Int): FSDataInputStream = convertExceptions {
    log.info(s"Cached opening file ${f.toUri.toString}")
    statistics.incrementReadOps(1)
    val cachedFile = getCachedFile(f)
    if (!cachedFile.exists()) {
      Files.createDirectories(cachedDirPath)
      val tempFile = new File(cachedDirPath.toFile, s"caching-${UUID.randomUUID()}.tmp")
      try {
        log.info(s"Fetching ${f.toUri.toString} to $tempFile")
        val in = super.open(f, bufferSize)
        val out = new FileOutputStream(tempFile)
        copyStream(in, out)
        if (!tempFile.exists()) {
          throw new IOException(s"Cached downloading ${f.toUri.toString} to $tempFile failed")
        }

        log.info(s"Copying $tempFile to $cachedFile")
        moveFile(tempFile, cachedFile)
        if (!cachedFile.exists()) {
          throw new IOException(s"Copying temp file from $tempFile to $cachedFile failed")
        }
      } finally {
        if (tempFile.exists()) {
          val deleteResult = tempFile.delete()
          log.info(s"Delete temp file $tempFile status: $deleteResult")
        }
      }
    } else {
      log.info("Reusing cached version")
    }

    new FSDataInputStream(new LocalFSInputStream(cachedFile))
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy