org.apache.spark.repl.SparkIMain.scala Maven / Gradle / Ivy

Go to download
// scalastyle:off

/* NSC -- new Scala compiler
 * Copyright 2005-2013 LAMP/EPFL
 * @author  Martin Odersky
 */

package org.apache.spark.repl

import java.io.File

import scala.tools.nsc._
import scala.tools.nsc.backend.JavaPlatform
import scala.tools.nsc.interpreter._

import Predef.{ println => _, _ }
import scala.tools.nsc.util.{MergedClassPath, stringFromWriter, ScalaClassLoader, stackTraceString}
import scala.reflect.internal.util._
import java.net.URL
import scala.sys.BooleanProp
import io.{AbstractFile, PlainFile, VirtualDirectory}

import reporters._
import symtab.Flags
import scala.reflect.internal.Names
import scala.tools.util.PathResolver
import ScalaClassLoader.URLClassLoader
import scala.tools.nsc.util.Exceptional.unwrap
import scala.collection.{ mutable, immutable }
import scala.util.control.Exception.{ ultimately }
import SparkIMain._
import java.util.concurrent.Future
import typechecker.Analyzer
import scala.language.implicitConversions
import scala.reflect.runtime.{ universe => ru }
import scala.reflect.{ ClassTag, classTag }
import scala.tools.reflect.StdRuntimeTags._
import scala.util.control.ControlThrowable

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils
import org.apache.spark.annotation.DeveloperApi

// /** directory to save .class files to */
// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
//   private def pp(root: AbstractFile, indentLevel: Int) {
//     val spaces = "    " * indentLevel
//     out.println(spaces + root.name)
//     if (root.isDirectory)
//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
//   }
//   // print the contents hierarchically
//   def show() = pp(this, 0)
// }

  /** An interpreter for Scala code.
   *
   *  The main public entry points are compile(), interpret(), and bind().
   *  The compile() method loads a complete Scala file.  The interpret() method
   *  executes one line of Scala code at the request of the user.  The bind()
   *  method binds an object to a variable that can then be used by later
   *  interpreted code.
   *
   *  The overall approach is based on compiling the requested code and then
   *  using a Java classloader and Java reflection to run the code
   *  and access its results.
   *
   *  In more detail, a single compiler instance is used
   *  to accumulate all successfully compiled or interpreted Scala code.  To
   *  "interpret" a line of code, the compiler generates a fresh object that
   *  includes the line of code and which has public member(s) to export
   *  all variables defined by that code.  To extract the result of an
   *  interpreted line to show the user, a second "result object" is created
   *  which imports the variables exported by the above object and then
   *  exports members called "$eval" and "$print". To accommodate user expressions
   *  that read from variables or methods defined in previous statements, "import"
   *  statements are used.
   *
   *  This interpreter shares the strengths and weaknesses of using the
   *  full compiler-to-Java.  The main strength is that interpreted code
   *  behaves exactly as does compiled code, including running at full speed.
   *  The main weakness is that redefining classes and methods is not handled
   *  properly, because rebinding at the Java level is technically difficult.
   *
   *  @author Moez A. Abdel-Gawad
   *  @author Lex Spoon
   */
  @DeveloperApi
  class SparkIMain(
      initialSettings: Settings,
      val out: JPrintWriter,
      propagateExceptions: Boolean = false)
    extends SparkImports with Logging { imain =>

    private val conf = new SparkConf()

    private val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
    /** Local directory to save .class files too */
    private[repl] val outputDir = {
      val rootDir = conf.getOption("spark.repl.classdir").getOrElse(Utils.getLocalDir(conf))
      Utils.createTempDir(root = rootDir, namePrefix = "repl")
    }
    if (SPARK_DEBUG_REPL) {
      echo("Output directory: " + outputDir)
    }

    /**
     * Returns the path to the output directory containing all generated
     * class files that will be served by the REPL class server.
     */
    @DeveloperApi
    lazy val getClassOutputDirectory = outputDir

    private val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
    /** Jetty server that will serve our classes to worker nodes */
    private var currentSettings: Settings             = initialSettings
    private var printResults                                  = true      // whether to print result lines
    private var totalSilence                                  = false     // whether to print anything
    private var _initializeComplete                   = false     // compiler is initialized
    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
    private var bindExceptions                        = true      // whether to bind the lastException variable
    private var _executionWrapper                     = ""        // code to be wrapped around all lines

    /** We're going to go to some trouble to initialize the compiler asynchronously.
     *  It's critical that nothing call into it until it's been initialized or we will
     *  run into unrecoverable issues, but the perceived repl startup time goes
     *  through the roof if we wait for it.  So we initialize it with a future and
     *  use a lazy val to ensure that any attempt to use the compiler object waits
     *  on the future.
     */
    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler

    private trait ExposeAddUrl extends URLClassLoader { def addNewUrl(url: URL) = this.addURL(url) }
    private var _runtimeClassLoader: URLClassLoader with ExposeAddUrl = null              // wrapper exposing addURL

    private val nextReqId = {
      var counter = 0
      () => { counter += 1 ; counter }
    }

    private def compilerClasspath: Seq[URL] = (
      if (isInitializeComplete) global.classPath.asURLs
      else new PathResolver(settings).result.asURLs  // the compiler's classpath
      )
    // NOTE: Exposed to repl package since accessed indirectly from SparkIMain
    private[repl] def settings = currentSettings
    private def mostRecentLine = prevRequestList match {
      case Nil      => ""
      case req :: _ => req.originalLine
    }
    // Run the code body with the given boolean settings flipped to true.
    private def withoutWarnings[T](body: => T): T = beQuietDuring {
      val saved = settings.nowarn.value
      if (!saved)
        settings.nowarn.value = true

      try body
      finally if (!saved) settings.nowarn.value = false
    }

    /** construct an interpreter that reports to Console */
    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
    def this() = this(new Settings())

    private lazy val repllog: Logger = new Logger {
      val out: JPrintWriter = imain.out
      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
    }
    private[repl] lazy val formatting: Formatting = new Formatting {
      val prompt = Properties.shellPromptString
    }

    // NOTE: Exposed to repl package since used by SparkExprTyper and SparkILoop
    private[repl] lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)

    /**
     * Determines if errors were reported (typically during compilation).
     *
     * @note This is not for runtime errors
     *
     * @return True if had errors, otherwise false
     */
    @DeveloperApi
    def isReportingErrors = reporter.hasErrors

    import formatting._
    import reporter.{ printMessage, withoutTruncating }

    // This exists mostly because using the reporter too early leads to deadlock.
    private def echo(msg: String) { Console println msg }
    private def _initSources = List(new BatchSourceFile("", "class $repl_$init { }"))
    private def _initialize() = {
      try {
        // todo. if this crashes, REPL will hang
        new _compiler.Run() compileSources _initSources
        _initializeComplete = true
        true
      }
      catch AbstractOrMissingHandler()
    }
    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""

    // argument is a thunk to execute after init is done
    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] def initialize(postInitSignal: => Unit) {
      synchronized {
        if (_isInitialized == null) {
          _isInitialized = io.spawn {
            try _initialize()
            finally postInitSignal
          }
        }
      }
    }

    /**
     * Initializes the underlying compiler/interpreter in a blocking fashion.
     *
     * @note Must be executed before using SparkIMain!
     */
    @DeveloperApi
    def initializeSynchronous(): Unit = {
      if (!isInitializeComplete) {
        _initialize()
        assert(global != null, global)
      }
    }
    private def isInitializeComplete = _initializeComplete

    /** the public, go through the future compiler */

    /**
     * The underlying compiler used to generate ASTs and execute code.
     */
    @DeveloperApi
    lazy val global: Global = {
      if (isInitializeComplete) _compiler
      else {
        // If init hasn't been called yet you're on your own.
        if (_isInitialized == null) {
          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
          initialize(())
        }
        //       // blocks until it is ; false means catastrophic failure
        if (_isInitialized.get()) _compiler
        else null
      }
    }
    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
    private lazy val compiler: global.type = global

    import global._
    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}

    private implicit class ReplTypeOps(tp: Type) {
      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
    }

    // TODO: If we try to make naming a lazy val, we run into big time
    // scalac unhappiness with what look like cycles.  It has not been easy to
    // reduce, but name resolution clearly takes different paths.
    // NOTE: Exposed to repl package since used by SparkExprTyper
    private[repl] object naming extends {
      val global: imain.global.type = imain.global
    } with Naming {
      // make sure we don't overwrite their unwisely named res3 etc.
      def freshUserTermName(): TermName = {
        val name = newTermName(freshUserVarName())
        if (definedNameMap contains name) freshUserTermName()
        else name
      }
      def isUserTermName(name: Name) = isUserVarName("" + name)
      def isInternalTermName(name: Name) = isInternalVarName("" + name)
    }
    import naming._

    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] object deconstruct extends {
      val global: imain.global.type = imain.global
    } with StructuredTypeStrings

    // NOTE: Exposed to repl package since used by SparkImports
    private[repl] lazy val memberHandlers = new {
      val intp: imain.type = imain
    } with SparkMemberHandlers
    import memberHandlers._

    /**
     * Suppresses overwriting print results during the operation.
     *
     * @param body The block to execute
     * @tparam T The return type of the block
     *
     * @return The result from executing the block
     */
    @DeveloperApi
    def beQuietDuring[T](body: => T): T = {
      val saved = printResults
      printResults = false
      try body
      finally printResults = saved
    }

    /**
     * Completely masks all output during the operation (minus JVM standard
     * out and error).
     *
     * @param operation The block to execute
     * @tparam T The return type of the block
     *
     * @return The result from executing the block
     */
    @DeveloperApi
    def beSilentDuring[T](operation: => T): T = {
      val saved = totalSilence
      totalSilence = true
      try operation
      finally totalSilence = saved
    }

    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] def quietRun[T](code: String) = beQuietDuring(interpret(code))

    private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
      case t: ControlThrowable => throw t
      case t: Throwable        =>
        logDebug(label + ": " + unwrap(t))
        logDebug(stackTraceString(unwrap(t)))
      alt
    }
    /** takes AnyRef because it may be binding a Throwable or an Exceptional */

    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
      bindExceptions = false

      try     beQuietDuring(body)
      catch   logAndDiscard("withLastExceptionLock", alt)
      finally bindExceptions = true
    }

    /**
     * Contains the code (in string form) representing a wrapper around all
     * code executed by this instance.
     *
     * @return The wrapper code as a string
     */
    @DeveloperApi
    def executionWrapper = _executionWrapper

    /**
     * Sets the code to use as a wrapper around all code executed by this
     * instance.
     *
     * @param code The wrapper code as a string
     */
    @DeveloperApi
    def setExecutionWrapper(code: String) = _executionWrapper = code

    /**
     * Clears the code used as a wrapper around all code executed by
     * this instance.
     */
    @DeveloperApi
    def clearExecutionWrapper() = _executionWrapper = ""

    /** interpreter settings */
    private lazy val isettings = new SparkISettings(this)

    /**
     * Instantiates a new compiler used by SparkIMain. Overridable to provide
     * own instance of a compiler.
     *
     * @param settings The settings to provide the compiler
     * @param reporter The reporter to use for compiler output
     *
     * @return The compiler as a Global
     */
    @DeveloperApi
    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
      settings.outputDirs setSingleOutput virtualDirectory
      settings.exposeEmptyPackage.value = true
      new Global(settings, reporter) with ReplGlobal {
        override def toString: String = ""
      }
    }

    /**
     * Adds any specified jars to the compile and runtime classpaths.
     *
     * @note Currently only supports jars, not directories
     * @param urls The list of items to add to the compile and runtime classpaths
     */
    @DeveloperApi
    def addUrlsToClassPath(urls: URL*): Unit = {
      new Run // Needed to force initialization of "something" to correctly load Scala classes from jars
      urls.foreach(_runtimeClassLoader.addNewUrl) // Add jars/classes to runtime for execution
      updateCompilerClassPath(urls: _*)           // Add jars/classes to compile time for compiling
    }

    private def updateCompilerClassPath(urls: URL*): Unit = {
      require(!global.forMSIL) // Only support JavaPlatform

      val platform = global.platform.asInstanceOf[JavaPlatform]

      val newClassPath = mergeUrlsIntoClassPath(platform, urls: _*)

      // NOTE: Must use reflection until this is exposed/fixed upstream in Scala
      val fieldSetter = platform.getClass.getMethods
        .find(_.getName.endsWith("currentClassPath_$eq")).get
      fieldSetter.invoke(platform, Some(newClassPath))

      // Reload all jars specified into our compiler
      global.invalidateClassPathEntries(urls.map(_.getPath): _*)
    }

    private def mergeUrlsIntoClassPath(platform: JavaPlatform, urls: URL*): MergedClassPath[AbstractFile] = {
      // Collect our new jars/directories and add them to the existing set of classpaths
      val allClassPaths = (
        platform.classPath.asInstanceOf[MergedClassPath[AbstractFile]].entries ++
        urls.map(url => {
          platform.classPath.context.newClassPath(
            if (url.getProtocol == "file") {
              val f = new File(url.getPath)
              if (f.isDirectory)
                io.AbstractFile.getDirectory(f)
              else
                io.AbstractFile.getFile(f)
            } else {
              io.AbstractFile.getURL(url)
            }
          )
        })
      ).distinct

      // Combine all of our classpaths (old and new) into one merged classpath
      new MergedClassPath(allClassPaths, platform.classPath.context)
    }

    /**
     * Represents the parent classloader used by this instance. Can be
     * overridden to provide alternative classloader.
     *
     * @return The classloader used as the parent loader of this instance
     */
    @DeveloperApi
    protected def parentClassLoader: ClassLoader =
      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )

    /* A single class loader is used for all commands interpreted by this Interpreter.
     It would also be possible to create a new class loader for each command
     to interpret.  The advantages of the current approach are:

    - Expressions are only evaluated one time.  This is especially
    significant for I/O, e.g. "val x = Console.readLine"

    The main disadvantage is:

    - Objects, classes, and methods cannot be rebound.  Instead, definitions
    shadow the old ones, and old code objects refer to the old
    definitions.
    */
    private def resetClassLoader() = {
      logDebug("Setting new classloader: was " + _classLoader)
      _classLoader = null
      ensureClassLoader()
    }
    private final def ensureClassLoader() {
      if (_classLoader == null)
        _classLoader = makeClassLoader()
    }

    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] def classLoader: AbstractFileClassLoader = {
      ensureClassLoader()
      _classLoader
    }
    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
      /** Overridden here to try translating a simple name to the generated
       *  class name if the original attempt fails.  This method is used by
       *  getResourceAsStream as well as findClass.
       */
      override protected def findAbstractFile(name: String): AbstractFile = {
        super.findAbstractFile(name) match {
          // deadlocks on startup if we try to translate names too early
          case null if isInitializeComplete =>
            generatedName(name) map (x => super.findAbstractFile(x)) orNull
          case file                         =>
            file
        }
      }
    }
    private def makeClassLoader(): AbstractFileClassLoader =
      new TranslatingClassLoader(parentClassLoader match {
        case null   => ScalaClassLoader fromURLs compilerClasspath
        case p      =>
          _runtimeClassLoader = new URLClassLoader(compilerClasspath, p) with ExposeAddUrl
          _runtimeClassLoader
      })

    private def getInterpreterClassLoader() = classLoader

    // Set the current Java "context" class loader to this interpreter's class loader
    // NOTE: Exposed to repl package since used by SparkILoopInit
    private[repl] def setContextClassLoader() = classLoader.setAsContext()

    /**
     * Returns the real name of a class based on its repl-defined name.
     *
     * ==Example==
     * Given a simple repl-defined name, returns the real name of
     * the class representing it, e.g. for "Bippy" it may return
     * {{{
     *     $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
     * }}}
     *
     * @param simpleName The repl-defined name whose real name to retrieve
     *
     * @return Some real name if the simple name exists, else None
     */
    @DeveloperApi
    def generatedName(simpleName: String): Option[String] = {
      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
      else optFlatName(simpleName)
    }

    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] def flatName(id: String)    = optFlatName(id) getOrElse id
    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)

    /**
     * Retrieves all simple names contained in the current instance.
     *
     * @return A list of sorted names
     */
    @DeveloperApi
    def allDefinedNames = definedNameMap.keys.toList.sorted

    private def pathToType(id: String): String = pathToName(newTypeName(id))
    // NOTE: Exposed to repl package since used by SparkILoop
    private[repl] def pathToTerm(id: String): String = pathToName(newTermName(id))

    /**
     * Retrieves the full code path to access the specified simple name
     * content.
     *
     * @param name The simple name of the target whose path to determine
     *
     * @return The full path used to access the specified target (name)
     */
    @DeveloperApi
    def pathToName(name: Name): String = {
      if (definedNameMap contains name)
        definedNameMap(name) fullPath name
      else name.toString
    }

    /** Most recent tree handled which wasn't wholly synthetic. */
    private def mostRecentlyHandledTree: Option[Tree] = {
      prevRequests.reverse foreach { req =>
        req.handlers.reverse foreach {
          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
          case _ => ()
        }
      }
      None
    }

    /** Stubs for work in progress. */
    private def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
      }
    }

    private def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
    //    Printing the types here has a tendency to cause assertion errors, like
        //   assertion failed: fatal:  has owner value x, but a class owner is required
        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
      }
    }

    private def recordRequest(req: Request) {
      if (req == null || referencedNameMap == null)
        return

      prevRequests += req
      req.referencedNames foreach (x => referencedNameMap(x) = req)

      // warning about serially defining companions.  It'd be easy
      // enough to just redefine them together but that may not always
      // be what people want so I'm waiting until I can do it better.
      for {
        name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
        oldReq <- definedNameMap get name.companionName
        newSym <- req.definedSymbols get name
        oldSym <- oldReq.definedSymbols get name.companionName
        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
      } {
        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
      }

      // Updating the defined name map
      req.definedNames foreach { name =>
        if (definedNameMap contains name) {
          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
        }
         definedNameMap(name) = req
      }
    }

    private def replwarn(msg: => String) {
      if (!settings.nowarnings.value)
        printMessage(msg)
    }

    private def isParseable(line: String): Boolean = {
      beSilentDuring {
        try parse(line) match {
          case Some(xs) => xs.nonEmpty  // parses as-is
          case None     => true         // incomplete
        }
        catch { case x: Exception =>    // crashed the compiler
          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
           false
         }
      }
    }

    private def compileSourcesKeepingRun(sources: SourceFile*) = {
      val run = new Run()
      reporter.reset()
      run compileSources sources.toList
      (!reporter.hasErrors, run)
    }

    /**
     * Compiles specified source files.
     *
     * @param sources The sequence of source files to compile
     *
     * @return True if successful, otherwise false
     */
    @DeveloperApi
    def compileSources(sources: SourceFile*): Boolean =
      compileSourcesKeepingRun(sources: _*)._1

    /**
     * Compiles a string of code.
     *
     * @param code The string of code to compile
     *
     * @return True if successful, otherwise false
     */
    @DeveloperApi
    def compileString(code: String): Boolean =
      compileSources(new BatchSourceFile("