All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.cloudera.livy.sessions.Session.scala Maven / Gradle / Ivy

/*
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.livy.sessions

import java.io.InputStream
import java.net.{URI, URISyntaxException}
import java.security.PrivilegedExceptionAction
import java.util.UUID
import java.util.concurrent.TimeUnit

import scala.concurrent.{ExecutionContext, Future}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.hadoop.security.UserGroupInformation

import com.cloudera.livy.{LivyConf, Logging, Utils}

object Session {

  lazy val configBlackList: Set[String] = {
    val url = getClass.getResource("/spark-blacklist.conf")
    if (url != null) Utils.loadProperties(url).keySet else Set()
  }

}

abstract class Session(val id: Int, val owner: String, val livyConf: LivyConf) extends Logging {

  import Session._

  protected implicit val executionContext = ExecutionContext.global

  private var _lastActivity = System.nanoTime()

  // Directory where the session's staging files are created. The directory is only accessible
  // to the session's effective user.
  private var stagingDir: Path = null

  def lastActivity: Long = state match {
    case SessionState.Error(time) => time
    case SessionState.Dead(time) => time
    case SessionState.Success(time) => time
    case _ => _lastActivity
  }

  val timeout: Long = TimeUnit.HOURS.toNanos(1)

  def state: SessionState

  def stop(): Future[Unit] = Future {
    try {
      stopSession()
    } catch {
      case e: Exception =>
        warn(s"Error stopping session $id.", e)
    }

    try {
      if (stagingDir != null) {
        debug(s"Deleting session $id staging directory $stagingDir")
        doAsOwner {
          val fs = FileSystem.newInstance(livyConf.hadoopConf)
          try {
            fs.delete(stagingDir, true)
          } finally {
            fs.close()
          }
        }
      }
    } catch {
      case e: Exception =>
        warn(s"Error cleaning up session $id staging dir.", e)
    }
  }

  def recordActivity(): Unit = {
    _lastActivity = System.nanoTime()
  }

  def logLines(): IndexedSeq[String]

  protected def stopSession(): Unit

  protected val proxyUser: Option[String]

  protected def doAsOwner[T](fn: => T): T = {
    val user = proxyUser.getOrElse(owner)
    if (user != null) {
      val ugi = if (UserGroupInformation.isSecurityEnabled) {
        UserGroupInformation.createProxyUser(user, UserGroupInformation.getCurrentUser())
      } else {
        UserGroupInformation.createRemoteUser(user)
      }
      ugi.doAs(new PrivilegedExceptionAction[T] {
        override def run(): T = fn
      })
    } else {
      fn
    }
  }

  protected def copyResourceToHDFS(dataStream: InputStream, name: String): URI = doAsOwner {
    val fs = FileSystem.newInstance(livyConf.hadoopConf)

    try {
      val filePath = new Path(getStagingDir(fs), name)
      debug(s"Uploading user file to $filePath")

      val outFile = fs.create(filePath, true)
      val buffer = new Array[Byte](512 * 1024)
      var read = -1
      try {
        while ({read = dataStream.read(buffer); read != -1}) {
          outFile.write(buffer, 0, read)
        }
      } finally {
        outFile.close()
      }
      filePath.toUri
    } finally {
      fs.close()
    }
  }

  /**
   * Prepends the value of the "fs.defaultFS" configuration to any URIs that do not have a
   * scheme. URIs are required to at least be absolute paths.
   *
   * @throws IllegalArgumentException If an invalid URI is found in the given list.
   */
  protected def resolveURIs(uris: Seq[String]): Seq[String] = {
    val defaultFS = livyConf.hadoopConf.get("fs.defaultFS").stripSuffix("/")
    uris.filter(_.nonEmpty).map { _uri =>
      val uri = try {
        new URI(_uri)
      } catch {
        case e: URISyntaxException => throw new IllegalArgumentException(e)
      }
      resolveURI(uri).toString()
    }
  }

  protected def resolveURI(uri: URI): URI = {
    val defaultFS = livyConf.hadoopConf.get("fs.defaultFS").stripSuffix("/")
    val resolved =
      if (uri.getScheme() == null) {
        require(uri.getPath().startsWith("/"), s"Path '${uri.getPath()}' is not absolute.")
        new URI(defaultFS + uri.getPath())
      } else {
        uri
      }

    if (resolved.getScheme() == "file") {
      // Make sure the location is whitelisted before allowing local files to be added.
      require(livyConf.localFsWhitelist.find(resolved.getPath().startsWith).isDefined,
        s"Local path ${uri.getPath()} cannot be added to user sessions.")
    }

    resolved
  }

  /**
   * Validates and prepares a user-provided configuration for submission.
   *
   * - Verifies that no blacklisted configurations are provided.
   * - Merges file lists in the configuration with the explicit lists provided in the request
   * - Resolve file URIs to make sure they reference the default FS
   * - Verify that file URIs don't reference non-whitelisted local resources
   */
  protected def prepareConf(conf: Map[String, String],
      jars: Seq[String],
      files: Seq[String],
      archives: Seq[String],
      pyFiles: Seq[String]): Map[String, String] = {
    if (conf == null) {
      return Map()
    }

    val errors = conf.keySet.filter(configBlackList.contains)
    if (errors.nonEmpty) {
      throw new IllegalArgumentException(
        "Blacklisted configuration values in session config: " + errors.mkString(", "))
    }

    val confLists: Map[String, Seq[String]] = livyConf.sparkFileLists
      .map { key => (key -> Nil) }.toMap

    val userLists = confLists ++ Map(
      LivyConf.SPARK_JARS -> jars,
      LivyConf.SPARK_FILES -> files,
      LivyConf.SPARK_ARCHIVES -> archives,
      LivyConf.SPARK_PY_FILES -> pyFiles)

    val merged = userLists.flatMap { case (key, list) =>
      val confList = conf.get(key)
        .map { list =>
          resolveURIs(list.split("[, ]+").toSeq)
        }
        .getOrElse(Nil)
      val userList = resolveURIs(list)
      if (confList.nonEmpty || userList.nonEmpty) {
        Some(key -> (userList ++ confList).mkString(","))
      } else {
        None
      }
    }

    conf ++ merged
  }

  private def getStagingDir(fs: FileSystem): Path = synchronized {
    if (stagingDir == null) {
      val stagingRoot = Option(livyConf.get(LivyConf.SESSION_STAGING_DIR)).getOrElse {
        new Path(fs.getHomeDirectory(), ".livy-sessions").toString()
      }

      val sessionDir = new Path(stagingRoot, UUID.randomUUID().toString())
      fs.mkdirs(sessionDir)
      fs.setPermission(sessionDir, new FsPermission("700"))
      stagingDir = sessionDir
      debug(s"Session $id staging directory is $stagingDir")
    }
    stagingDir
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy