All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.parquet.GcsConnectorUtil.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.parquet

import com.google.auth.oauth2.{GoogleCredentials, ServiceAccountCredentials}
import com.google.cloud.hadoop.util.AccessTokenProvider
import com.spotify.scio.ScioContext
import com.spotify.scio.util.ScioUtil
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat

import java.io.File
import java.util.Locale
import scala.util.{Success, Try}

private[parquet] object GcsConnectorUtil {

  /**
   * Attempts to set Hadoop credential configuration when running locally. This is needed since
   * [[FileInputFormat.setInputPaths]] validates paths locally and requires the user's GCP
   * credentials.
   *
   * In order of precedence, credentials will be searched for: (1) in the
   * GOOGLE_APPLICATION_CREDENTIALS environment variable (2) in user's home
   * .config/gcloud/application_default_credentials.json file
   *
   * If neither of these paths exist, `fs.gs.auth.null.enable` will be set (to enable unit testing).
   */
  def setCredentials(job: Job): Unit = {
    Try(GoogleCredentials.getApplicationDefault()).map {
      case _: ServiceAccountCredentials => getWellKnownCredentialFile.map(_.toString)
      case _                            => None
    } match {
      case Success(Some(sa)) =>
        job.getConfiguration.set("fs.gs.auth.service.account.json.keyfile", sa)
      case Success(None) =>
        job.getConfiguration.set(
          "fs.gs.auth.access.token.provider.impl",
          "com.spotify.scio.parquet.ApplicationDefaultTokenProvider"
        )
      case _ =>
        job.getConfiguration.setBoolean("fs.gs.auth.service.account.enable", false)
        job.getConfiguration.setBoolean("fs.gs.auth.null.enable", true)
    }
  }

  def unsetCredentials(job: Job): Unit = {
    job.getConfiguration.unset("fs.gs.auth.service.account.json.keyfile")
    job.getConfiguration.unset("fs.gs.auth.access.token.provider.impl")
    job.getConfiguration.unset("fs.gs.auth.null.enable")
    job.getConfiguration.unset("fs.gs.auth.service.account.enable")
  }

  def setInputPaths(sc: ScioContext, job: Job, path: String): Unit = {
    // This is needed since `FileInputFormat.setInputPaths` validates paths locally and requires
    // the user's GCP credentials.
    GcsConnectorUtil.setCredentials(job)

    FileInputFormat.setInputPaths(job, path)

    // It will interfere with credentials in Dataflow workers
    if (!ScioUtil.isLocalRunner(sc.options.getRunner)) {
      GcsConnectorUtil.unsetCredentials(job)
    }
  }

  // Adapted from com.google.auth.oauth2.DefaultCredentialsProvider
  private def getWellKnownCredentialFile: Option[File] = {
    sys.env
      .get("GOOGLE_APPLICATION_CREDENTIALS")
      .map(new File(_))
      .filter(_.exists())
      .orElse {
        val os = sys.props.getOrElse("os.name", "").toLowerCase(Locale.US)
        val cloudRootPath = if (os.contains("windows")) {
          new File(sys.env("APPDATA"))
        } else {
          new File(sys.props.getOrElse("user.home", ""), ".config")
        }
        Some(
          new File(cloudRootPath, "gcloud/application_default_credentials.json")
        ).filter(_.exists())
      }
  }
}

class ApplicationDefaultTokenProvider() extends AccessTokenProvider {
  private lazy val adc = GoogleCredentials.getApplicationDefault()
  private var conf: Option[Configuration] = None

  override def getAccessToken: AccessTokenProvider.AccessToken = {
    val gToken = Option(adc.getAccessToken).getOrElse { adc.refresh(); adc.getAccessToken }
    new AccessTokenProvider.AccessToken(gToken.getTokenValue, gToken.getExpirationTime.getTime)
  }
  override def refresh(): Unit = adc.refresh()
  override def setConf(c: Configuration): Unit = conf = Some(c)
  override def getConf: Configuration = conf.orNull
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy