All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hindog.grid.hadoop.HadoopEnvironment.scala Maven / Gradle / Ivy

package com.hindog.grid.hadoop

import com.hindog.grid.ClasspathUtils
import org.apache.commons.io._
import org.apache.commons.io.filefilter.SuffixFileFilter
import org.apache.hadoop.conf.Configuration

import java.io._
import java.net.{URL, URLClassLoader}
import java.util.concurrent.TimeUnit

/*
 *    __   _         __         
 *   / /  (_)__  ___/ /__  ____
 *  / _ \/ / _ \/ _  / _ \/ _  /
 * /_//_/_/_//_/\_,_/\___/\_, / 
 *                       /___/
 */
object HadoopEnvironment {

  private val defaultFiles = Array("core-site.xml", "hdfs-site.xml", "yarn-site.xml", "mapred-site.xml")

  /**
    * Load Hadoop configuration using all files ending with ".xml" in `HADOOP_CONF_DIR`
    */
  def loadConfiguration(loadDefaults: Boolean): Configuration = loadConfiguration(loadDefaults, Array.empty)

  /**
    * Load Hadoop configuration using one of two methods:
    *
    * 1) If HADOOP_CONF_DIR is set, load all (or specified) resource files from that folder.
    *
    * -- otherwise --
    * 
    * 2) Use Hadoop classpath obtained from `hadoop classpath` command.  By default, we'll
    * load `core-site.xml`, `hdfs-site.xml`, `mapred-site.xml` and `yarn-site.xml` files we find on the classpath.
    */
  def loadConfiguration(loadDefaults: Boolean = true, files: Array[String] = Array.empty): Configuration = {
    import scala.collection.JavaConverters._
    Option(System.getenv("HADOOP_CONF_DIR")).map(dir => new File(dir)) match {
      case Some(confDir) => {
        val conf = new Configuration(loadDefaults)
        if (files.isEmpty) {
          FileUtils.listFiles(confDir, Array(".xml"), false).asScala.foreach(f => conf.addResource(f.toURI.toURL))
        } else {
          files.foreach(f => conf.addResource(new File(confDir, f).toURI.toURL))
        }
        conf
      }
      case None => {
        val confFiles = if (files.isEmpty) defaultFiles else files
        val hadoopClassloader = new URLClassLoader(classpath)
        val conf = new Configuration(loadDefaults)
        conf.setClassLoader(hadoopClassloader)
        confFiles.foreach(conf.addResource)
        conf
      }
    }
  }

  /**
    * Fairly portable method for getting the Hadoop classpath in the current environment.
    * It assumes that the 'hadoop' command is available and calls 'hadoop classpath', parses
    * the result, and returns a resolved list of URL's (including wildcard/glob-style patterns,
    * see `com.hindog.grid.ClasspathUtils` for more info).
    */
  def classpath: Array[URL] = {
    val cpProcess = new ProcessBuilder("hadoop", "classpath").start()
    val stringWriter = new StringWriter()

    cpProcess.waitFor(10, TimeUnit.SECONDS)
    IOUtils.copy(cpProcess.getInputStream, stringWriter)

    ClasspathUtils.explodeClasspath(stringWriter.toString)
  }

  def withHadoopClassloader[T](thunk: => T): T = {
    val parent = Thread.currentThread().getContextClassLoader
    try {
      Thread.currentThread().setContextClassLoader(new URLClassLoader(classpath))
      thunk
    } finally {
      Thread.currentThread().setContextClassLoader(parent)
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy