All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.spark.deploy.yarn.security.EsServiceCredentialProvider.scala Maven / Gradle / Ivy

There is a newer version: 8.17.0
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.spark.deploy.yarn.security

import java.security.PrivilegedExceptionAction
import java.util
import java.util.UUID

import org.apache.commons.logging.LogFactory
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.UserGroupInformation
import org.apache.spark.SparkConf
import org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
import org.elasticsearch.hadoop.cfg.CompositeSettings
import org.elasticsearch.hadoop.cfg.HadoopSettingsManager
import org.elasticsearch.hadoop.mr.security.EsTokenIdentifier
import org.elasticsearch.hadoop.mr.security.HadoopUserProvider
import org.elasticsearch.hadoop.mr.security.TokenUtil
import org.elasticsearch.hadoop.rest.InitializationUtils
import org.elasticsearch.hadoop.rest.RestClient
import org.elasticsearch.hadoop.security.AuthenticationMethod
import org.elasticsearch.hadoop.security.EsToken
import org.elasticsearch.hadoop.security.UserProvider
import org.elasticsearch.spark.cfg.SparkSettingsManager

/**
 * A provider interface in Spark's Yarn library that obtains tokens for an application.
 *
 * When a job is submitted to a YARN cluster, the credential providers are constructed
 * using a service loader. Each provider is queried for if its service requires a token,
 * and if so, Spark requests that it obtains one.
 *
 * In client deployment mode, these tokens are retrieved on the driver when the YARN
 * application is first initialized and started by the main program.
 *
 * In cluster deployment mode, these tokens are retrieved initially on the driver before
 * submitting the application master that runs the main program.
 *
 * If a principal and keytab are provided to a job, a credentials file is created, and
 * a background thread is started on the application master that will obtain new tokens
 * when they get close to expiring. Those tokens are written to an HDFS directory which
 * the worker nodes will regularly poll to get updated tokens. If the job is launched
 * in client mode, the client will also receive updated tokens.
 */
class EsServiceCredentialProvider extends ServiceCredentialProvider {

  private[this] val LOG = LogFactory.getLog(classOf[EsServiceCredentialProvider])

  LOG.info("Loaded EsServiceCredentialProvider")

  /**
   * Name of the service for logging purposes and for the purpose of determining if the
   * service is disabled in the settings using the property
   * spark.security.credentials.[serviceName].enabled
   * @return the service name this provider corresponds to
   */
  override def serviceName: String = "elasticsearch"

  /**
    *  Given a configuration, check to see if tokens would be required.
    *
    * @param hadoopConf the current Hadoop configuration
    * @return true if tokens should be gathered, false if they should not be
    */
  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    credentialsRequired(null, hadoopConf)
  }

  /**
    *  Given a configuration, check to see if tokens would be required.
    *
    * @param sparkConf the current Spark configuration - used by Cloudera's CDS Spark fork (#1301)
    * @param hadoopConf the current Hadoop configuration
    * @return true if tokens should be gathered, false if they should not be
    */
  def credentialsRequired(sparkConf: SparkConf, hadoopConf: Configuration): Boolean = {
    val settings = if (sparkConf != null) {
      new CompositeSettings(util.Arrays.asList(
        new SparkSettingsManager().load(sparkConf),
        new HadoopSettingsManager().load(hadoopConf)
      ))
    } else {
      HadoopSettingsManager.loadFrom(hadoopConf)
    }
    val isSecurityEnabled = UserGroupInformation.isSecurityEnabled
    val esAuthMethod = settings.getSecurityAuthenticationMethod
    val required = isSecurityEnabled && AuthenticationMethod.KERBEROS.equals(esAuthMethod)
    LOG.info(s"Hadoop Security Enabled = [$isSecurityEnabled]")
    LOG.info(s"ES Auth Method = [$esAuthMethod]")
    LOG.info(s"Are creds required = [$required]")
    required
  }

  /**
   * Obtains api key tokens from Elasticsearch and stashes them in the given credentials object
   * @param hadoopConf Hadoop configuration, picking up all Hadoop specific settings
   * @param sparkConf All settings that exist in Spark
   * @param creds The credentials object that will be shared between all workers
   * @return The expiration time for the token
   */
  override def obtainCredentials(hadoopConf: Configuration, sparkConf: SparkConf, creds: Credentials): Option[Long] = {
    val settings = new CompositeSettings(util.Arrays.asList(
      new SparkSettingsManager().load(sparkConf),
      new HadoopSettingsManager().load(hadoopConf)
    ))
    InitializationUtils.setUserProviderIfNotSet(settings, classOf[HadoopUserProvider],
      LogFactory.getLog(classOf[EsServiceCredentialProvider]))
    val userProvider = UserProvider.create(settings)
    val client = new RestClient(settings)
    try {
      val user = userProvider.getUser
      val esToken = user.doAs(new PrivilegedExceptionAction[EsToken]() {
        override def run: EsToken = client.createNewApiToken(TokenUtil.KEY_NAME_PREFIX + UUID.randomUUID().toString)
      })
      if (LOG.isInfoEnabled) {
        LOG.info(s"getting token for: Elasticsearch[tokenName=${esToken.getName}, " +
          s"clusterName=${esToken.getClusterName}, user=${user}]")
      }
      val expiration = esToken.getExpirationTime
      val token = EsTokenIdentifier.createTokenFrom(esToken)
      creds.addToken(token.getService, token)
      Some(expiration)
    } finally {
      client.close()
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy