All Downloads are FREE. Search and download functionalities are using the official Maven repository.

no.nrk.bigquery.BigQueryTransferClient.scala Maven / Gradle / Ivy

/*
 * Copyright 2020 NRK
 *
 * SPDX-License-Identifier: MIT
 */

package no.nrk.bigquery

import cats.effect.IO
import cats.effect.kernel.Resource
import cats.syntax.traverse._
import com.google.api.gax.core.FixedCredentialsProvider
import com.google.auth.oauth2.ServiceAccountCredentials
import com.google.cloud.bigquery.datatransfer.v1._
import com.google.protobuf.{Struct, Value}
import no.nrk.bigquery.BigQueryTransferClient.{TransferConfigFailed, TransferFailed, TransferStatus, TransferSucceeded}
import org.typelevel.log4cats.slf4j.Slf4jFactory

import scala.concurrent.duration.{DurationInt, FiniteDuration}
import scala.jdk.CollectionConverters._

class BigQueryTransferClient(transferClient: DataTransferServiceClient) {
  protected lazy val logger = Slf4jFactory.create[IO].getLogger

  def getTransferConfig(
      transferConfigName: TransferConfigName
  ): IO[Option[TransferConfig]] =
    IO.interruptible(
      transferClient.listTransferConfigs(
        s"projects/${transferConfigName.getProject}/locations/${transferConfigName.getLocation}"
      )
    ).map(
      _.iterateAll().asScala.find(config => config.getDisplayName == transferConfigName.getTransferConfig)
    )

  def buildTransferConfig(
      sourceDatasetId: BQDataset,
      destinationDatasetId: BQDataset,
      transferConfigName: TransferConfigName
  ): TransferConfig =
    TransferConfig.newBuilder
      .setDisplayName(transferConfigName.getTransferConfig)
      .setDestinationDatasetId(destinationDatasetId.id)
      .setDataSourceId("cross_region_copy")
      .setDatasetRegion(transferConfigName.getLocation)
      .setScheduleOptions(
        ScheduleOptions.newBuilder().setDisableAutoScheduling(true)
      )
      .setParams(
        Struct.newBuilder
          .putFields(
            "source_project_id",
            Value.newBuilder.setStringValue(sourceDatasetId.project.value).build
          )
          .putFields(
            "source_dataset_id",
            Value.newBuilder.setStringValue(sourceDatasetId.id).build
          )
          .build
      )
      .build

  def createTransferConfig(
      projectId: String,
      transferConfig: TransferConfig
  ): IO[TransferConfig] = {
    val projectName = ProjectName.of(projectId)
    val transferRequest = CreateTransferConfigRequest.newBuilder
      .setParent(projectName.toString)
      .setTransferConfig(transferConfig)
      .build
    IO.interruptible(transferClient.createTransferConfig(transferRequest))
  }

  def startDatasetTransfer(transferConfig: TransferConfig): IO[TransferStatus] =
    for {
      _ <- logger.info(
        s"Starting a run in transferconfig ${transferConfig.getDisplayName}"
      )
      startRequest = StartManualTransferRunsRequest.newBuilder
        .setParent(transferConfig.getName)
        .setRequestedRunTime(transferConfig.getUpdateTime)
        .build()
      runResponse <- IO.interruptible(
        transferClient.startManualTransferRuns(startRequest)
      )
      activeRun = runResponse.getRunsList.asScala.headOption
      transferStatus <- activeRun.traverse(activeRun =>
        pollDatasetTransfer(activeRun, 15.seconds)(retry =
          IO.interruptible(transferClient.getTransferRun(activeRun.getName))).attempt.map {
          case Left(err) => TransferFailed(activeRun, err)
          case Right(transfer) => transfer
        })
    } yield transferStatus match {
      case Some(transfer) => transfer
      case None => TransferConfigFailed(transferConfig)
    }

  private def pollDatasetTransfer(
      runningTransfer: TransferRun,
      baseDelay: FiniteDuration
  )(
      retry: IO[TransferRun]
  ): IO[TransferStatus] = {
    def waitAndPoll: IO[TransferStatus] =
      IO.sleep(baseDelay) >> retry.flatMap(running => go(running))

    def go(runningTransfer: TransferRun): IO[TransferStatus] =
      runningTransfer.getState match {
        case TransferState.PENDING =>
          logger.info(s"${runningTransfer.getName} is pending.") >> waitAndPoll
        case TransferState.RUNNING =>
          logger.info(s"${runningTransfer.getName} is running.") >> waitAndPoll
        case TransferState.SUCCEEDED =>
          logger.info(s"${runningTransfer.getName} succeeded.") >> IO.pure(
            TransferSucceeded(runningTransfer)
          )

        case TransferState.FAILED =>
          logger.error(s"${runningTransfer.getName} failed.") >> IO.pure(
            TransferFailed(
              runningTransfer,
              new Exception(
                s"${runningTransfer.getName} failed for unknown reason."
              )
            )
          )
        case TransferState.CANCELLED =>
          logger.error(s"${runningTransfer.getName} cancelled.") >> IO.pure(
            TransferFailed(
              runningTransfer,
              new Exception(
                s"${runningTransfer.getName} cancelled for unknown reason."
              )
            )
          )

        case TransferState.TRANSFER_STATE_UNSPECIFIED =>
          logger.info(
            s"${runningTransfer.getName} in unspecified state."
          ) >> waitAndPoll
        case TransferState.UNRECOGNIZED =>
          logger.info(
            s"${runningTransfer.getName} in unrecognized state."
          ) >> waitAndPoll

      }

    IO.sleep(baseDelay) >> go(runningTransfer)
  }
}

object BigQueryTransferClient {
  sealed trait TransferStatus
  case class TransferSucceeded(transferRun: TransferRun) extends TransferStatus
  case class TransferFailed(transferRun: TransferRun, error: Throwable) extends TransferStatus
  case class TransferConfigFailed(transferConfig: TransferConfig) extends TransferStatus

  def resource(
      credentials: ServiceAccountCredentials
  ): Resource[IO, BigQueryTransferClient] = {
    val dataTransferServiceClient = IO.blocking(
      DataTransferServiceClient.create(
        DataTransferServiceSettings
          .newBuilder()
          .setCredentialsProvider(FixedCredentialsProvider.create(credentials))
          .build()
      )
    )
    Resource
      .fromAutoCloseable(dataTransferServiceClient)
      .map(new BigQueryTransferClient(_))
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy