All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.spark.redshift.AWSCredentialsUtils.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Databricks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.databricks.spark.redshift

import java.net.URI

import com.amazonaws.auth.{BasicAWSCredentials, AWSCredentials, AWSSessionCredentials, InstanceProfileCredentialsProvider}
import org.apache.hadoop.conf.Configuration

import com.databricks.spark.redshift.Parameters.MergedParameters

private[redshift] object AWSCredentialsUtils {

  /**
    * Generates a credentials string for use in Redshift COPY and UNLOAD statements.
    * Favors a configured `aws_iam_role` if available in the parameters.
    */
  def getRedshiftCredentialsString(params: MergedParameters,
                                   awsCredentials: AWSCredentials): String = {
    params.iamRole
      .map { role => s"aws_iam_role=$role" }
      .getOrElse(
        awsCredentials match {
          case creds: AWSSessionCredentials =>
            s"aws_access_key_id=${creds.getAWSAccessKeyId};" +
              s"aws_secret_access_key=${creds.getAWSSecretKey};token=${creds.getSessionToken}"
          case creds =>
            s"aws_access_key_id=${creds.getAWSAccessKeyId};" +
              s"aws_secret_access_key=${creds.getAWSSecretKey}"
        })
  }

  def load(params: MergedParameters, hadoopConfiguration: Configuration): AWSCredentials = {
    params.temporaryAWSCredentials.getOrElse(loadFromURI(params.rootTempDir, hadoopConfiguration))
  }

  private def loadFromURI(tempPath: String, hadoopConfiguration: Configuration): AWSCredentials = {
    // scalastyle:off
    // A good reference on Hadoop's configuration loading / precedence is
    // https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
    // scalastyle:on
    val uri = new URI(tempPath)
    uri.getScheme match {
      case "s3" | "s3n" =>
        val creds = new S3Credentials()
        creds.initialize(uri, hadoopConfiguration)
        new BasicAWSCredentials(creds.getAccessKey, creds.getSecretAccessKey)
      case "s3a" =>
        // This matches what S3A does, with one exception: we don't support anonymous credentials.
        // First, try to parse from URI:
        Option(uri.getUserInfo).flatMap { userInfo =>
          if (userInfo.contains(":")) {
            val Array(accessKey, secretKey) = userInfo.split(":")
            Some(new BasicAWSCredentials(accessKey, secretKey))
          } else {
            None
          }
        }.orElse {
          // Next, try to read from configuration
          val accessKey = hadoopConfiguration.get("fs.s3a.access.key", null)
          val secretKey = hadoopConfiguration.get("fs.s3a.secret.key", null)
          if (accessKey != null && secretKey != null) {
            Some(new BasicAWSCredentials(accessKey, secretKey))
          } else {
            None
          }
        }.getOrElse {
          // Finally, fall back on the instance profile provider
         new InstanceProfileCredentialsProvider().getCredentials
        }
      case other =>
        throw new IllegalArgumentException(s"Unrecognized scheme $other; expected s3, s3n, or s3a")
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy