All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.gcp.bigquery.sqlengine.BigQuerySQLEngine Maven / Gradle / Ivy

The newest version!
/*
 * Copyright © 2021 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.plugin.gcp.bigquery.sqlengine;

import com.google.auth.Credentials;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.Dataset;
import com.google.cloud.bigquery.DatasetId;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.kms.v1.CryptoKeyName;
import com.google.cloud.storage.Storage;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import io.cdap.cdap.api.SQLEngineContext;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Metadata;
import io.cdap.cdap.api.annotation.MetadataProperty;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.metrics.Metrics;
import io.cdap.cdap.etl.api.PipelineConfigurer;
import io.cdap.cdap.etl.api.connector.Connector;
import io.cdap.cdap.etl.api.engine.sql.BatchSQLEngine;
import io.cdap.cdap.etl.api.engine.sql.SQLEngineException;
import io.cdap.cdap.etl.api.engine.sql.capability.DefaultPullCapability;
import io.cdap.cdap.etl.api.engine.sql.capability.PullCapability;
import io.cdap.cdap.etl.api.engine.sql.dataset.SQLDataset;
import io.cdap.cdap.etl.api.engine.sql.dataset.SQLDatasetProducer;
import io.cdap.cdap.etl.api.engine.sql.dataset.SQLPullDataset;
import io.cdap.cdap.etl.api.engine.sql.dataset.SQLPushDataset;
import io.cdap.cdap.etl.api.engine.sql.request.SQLJoinDefinition;
import io.cdap.cdap.etl.api.engine.sql.request.SQLJoinRequest;
import io.cdap.cdap.etl.api.engine.sql.request.SQLPullRequest;
import io.cdap.cdap.etl.api.engine.sql.request.SQLPushRequest;
import io.cdap.cdap.etl.api.engine.sql.request.SQLReadRequest;
import io.cdap.cdap.etl.api.engine.sql.request.SQLReadResult;
import io.cdap.cdap.etl.api.engine.sql.request.SQLRelationDefinition;
import io.cdap.cdap.etl.api.engine.sql.request.SQLTransformDefinition;
import io.cdap.cdap.etl.api.engine.sql.request.SQLTransformRequest;
import io.cdap.cdap.etl.api.engine.sql.request.SQLWriteRequest;
import io.cdap.cdap.etl.api.engine.sql.request.SQLWriteResult;
import io.cdap.cdap.etl.api.join.JoinCondition;
import io.cdap.cdap.etl.api.join.JoinDefinition;
import io.cdap.cdap.etl.api.join.JoinStage;
import io.cdap.cdap.etl.api.relational.Capability;
import io.cdap.cdap.etl.api.relational.Engine;
import io.cdap.cdap.etl.api.relational.ExpressionFactory;
import io.cdap.cdap.etl.api.relational.Relation;
import io.cdap.cdap.etl.api.relational.StringExpressionFactoryType;
import io.cdap.plugin.gcp.bigquery.connector.BigQueryConnector;
import io.cdap.plugin.gcp.bigquery.relational.BigQueryRelation;
import io.cdap.plugin.gcp.bigquery.relational.SQLExpressionFactory;
import io.cdap.plugin.gcp.bigquery.sink.BigQuerySinkUtils;
import io.cdap.plugin.gcp.bigquery.source.BigQuerySourceUtils;
import io.cdap.plugin.gcp.bigquery.sqlengine.builder.BigQueryJoinSQLBuilder;
import io.cdap.plugin.gcp.bigquery.sqlengine.util.BigQuerySQLEngineUtils;
import io.cdap.plugin.gcp.bigquery.util.BigQueryUtil;
import io.cdap.plugin.gcp.common.CmekUtils;
import io.cdap.plugin.gcp.common.GCPUtils;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
/**
 * SQL Engine implementation using BigQuery as the execution engine.
 */
@Plugin(type = BatchSQLEngine.PLUGIN_TYPE)
@Name(BigQuerySQLEngine.NAME)
@Description("BigQuery SQLEngine implementation, used to push down certain pipeline steps into BigQuery. "
  + "A GCS bucket is used as staging for the read/write operations performed by this engine. "
  + "BigQuery is Google's serverless, highly scalable, enterprise data warehouse.")
@Metadata(properties = {@MetadataProperty(key = Connector.PLUGIN_TYPE, value = BigQueryConnector.NAME)})
public class BigQuerySQLEngine
  extends BatchSQLEngine
  implements Engine {

  private static final Logger LOG = LoggerFactory.getLogger(BigQuerySQLEngine.class);

  public static final String NAME = "BigQueryPushdownEngine";

  private final BigQuerySQLEngineConfig sqlEngineConfig;
  private SQLEngineContext ctx;
  private BigQuery bigQuery;
  private Storage storage;
  private Configuration configuration;
  private String project;
  private String datasetProject;
  private String datasetName;
  private String bucket;
  private String runId;
  private Map datasets;
  private Metrics metrics;

  @SuppressWarnings("unused")
  public BigQuerySQLEngine(BigQuerySQLEngineConfig sqlEngineConfig) {
    this.sqlEngineConfig = sqlEngineConfig;
  }

  @Override
  public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
    super.configurePipeline(pipelineConfigurer);

    // Validate configuration and throw exception if the supplied configuration is invalid.
    sqlEngineConfig.validate(pipelineConfigurer.getStageConfigurer().getFailureCollector());
  }

  @Override
  public void prepareRun(SQLEngineContext context) throws Exception {
    super.prepareRun(context);

    // Initialize context
    ctx = context;

    // Validate configuration and throw exception if the supplied configuration is invalid.
    sqlEngineConfig.validate();

    runId = BigQuerySQLEngineUtils.newIdentifier();
    datasets = new HashMap<>();

    String serviceAccount = sqlEngineConfig.getServiceAccount();
    Credentials credentials = serviceAccount == null ?
      null : GCPUtils.loadServiceAccountCredentials(serviceAccount, sqlEngineConfig.isServiceAccountFilePath());
    project = sqlEngineConfig.getProject();
    datasetProject = sqlEngineConfig.getDatasetProject();
    datasetName = sqlEngineConfig.getDataset();

    // Initialize BQ and GCS clients.
    bigQuery = GCPUtils.getBigQuery(project, credentials);
    storage = GCPUtils.getStorage(project, credentials);
    Dataset dataset = bigQuery.getDataset(DatasetId.of(datasetProject, datasetName));
    bucket = BigQueryUtil.getStagingBucketName(context.getRuntimeArguments(), sqlEngineConfig.getLocation(),
                                               dataset, sqlEngineConfig.getBucket());

    String cmekKey = !Strings.isNullOrEmpty(sqlEngineConfig.cmekKey) ? sqlEngineConfig.cmekKey :
      ctx.getRuntimeArguments().get(CmekUtils.CMEK_KEY);
    CryptoKeyName cmekKeyName = null;
    if (!Strings.isNullOrEmpty(cmekKey)) {
      cmekKeyName = CryptoKeyName.parse(cmekKey);
    }
    configuration = BigQueryUtil.getBigQueryConfig(sqlEngineConfig.getServiceAccount(), sqlEngineConfig.getProject(),
                                                   cmekKeyName, sqlEngineConfig.getServiceAccountType());
    // Configure GCS bucket that is used to stage temporary files.
    // If the bucket is created for this run, mark it for deletion after executon is completed
    String fallbackBucketName = "bqpushdown-" + runId;
    bucket = BigQuerySinkUtils.configureBucket(configuration, bucket, fallbackBucketName);

    // Create resources needed for this execution
    BigQuerySinkUtils.createResources(bigQuery, storage, DatasetId.of(datasetProject, datasetName), bucket,
                                      sqlEngineConfig.getLocation(), cmekKeyName);

    // Configure credentials for the source
    BigQuerySourceUtils.configureServiceAccount(configuration, sqlEngineConfig.connection);

    // Get metrics instance
    metrics = ctx.getMetrics();
  }

  @Override
  public void onRunFinish(boolean succeeded, SQLEngineContext context) {
    super.onRunFinish(succeeded, context);

    String gcsPath;
    // If the bucket was created for this run, we should delete it.
    // Otherwise, just clean the directory within the provided bucket.
    if (sqlEngineConfig.getBucket() == null) {
      gcsPath = String.format("gs://%s", bucket);
    } else {
      gcsPath = String.format(BigQuerySinkUtils.GS_PATH_FORMAT, bucket, runId);
    }
    try {
      BigQueryUtil.deleteTemporaryDirectory(configuration, gcsPath);
    } catch (IOException e) {
      LOG.warn("Failed to delete temporary directory '{}': {}", gcsPath, e.getMessage());
    }
  }

  @Override
  public SQLPushDataset getPushProvider(SQLPushRequest sqlPushRequest)
    throws SQLEngineException {
    try {
      BigQueryPushDataset pushDataset =
        BigQueryPushDataset.getInstance(sqlPushRequest,
                                        sqlEngineConfig,
                                        configuration,
                                        bigQuery,
                                        DatasetId.of(datasetProject, datasetName),
                                        bucket,
                                        runId);

      LOG.info("Executing Push operation for dataset {} stored in table {}",
               sqlPushRequest.getDatasetName(),
               pushDataset.getBigQueryTable());

      datasets.put(sqlPushRequest.getDatasetName(), pushDataset);
      return pushDataset;
    } catch (IOException ioe) {
      throw new SQLEngineException(ioe);
    }
  }

  @Override
  public SQLPullDataset getPullProvider(
    SQLPullRequest sqlPullRequest) throws SQLEngineException {
    if (!datasets.containsKey(sqlPullRequest.getDatasetName())) {
      throw new SQLEngineException(String.format("Trying to pull non-existing dataset: '%s",
                                                 sqlPullRequest.getDatasetName()));
    }

    String table = datasets.get(sqlPullRequest.getDatasetName()).getBigQueryTable();

    LOG.info("Executing Pull operation for dataset {} stored in table {}", sqlPullRequest.getDatasetName(), table);

    try {
      return BigQueryPullDataset.getInstance(sqlPullRequest,
                                             configuration,
                                             bigQuery,
                                             DatasetId.of(datasetProject, datasetName),
                                             table,
                                             bucket,
                                             runId);
    } catch (IOException ioe) {
      throw new SQLEngineException(ioe);
    }
  }

  @Override
  public boolean exists(String datasetName) throws SQLEngineException {
    return datasets.containsKey(datasetName);
  }

  @Override
  public boolean canJoin(SQLJoinDefinition sqlJoinDefinition) {
    boolean canJoin = isValidJoinDefinition(sqlJoinDefinition);
    LOG.info("Validating join for stage '{}' can be executed on BigQuery: {}",
             sqlJoinDefinition.getDatasetName(),
             canJoin);
    return canJoin;
  }

  @VisibleForTesting
  protected static boolean isValidJoinDefinition(SQLJoinDefinition sqlJoinDefinition) {
    List validationProblems = new ArrayList<>();

    JoinDefinition joinDefinition = sqlJoinDefinition.getJoinDefinition();

    // Ensure none of the input schemas contains unsupported types or invalid stage names.
    for (JoinStage inputStage : joinDefinition.getStages()) {
      // Validate input stage schema and identifier
      BigQuerySQLEngineUtils.validateInputStage(inputStage, validationProblems);
    }

    // Ensure the output schema doesn't contain unsupported types
    BigQuerySQLEngineUtils.validateOutputSchema(joinDefinition.getOutputSchema(), validationProblems);

    // Ensure expression joins have valid aliases
    if (joinDefinition.getCondition().getOp() == JoinCondition.Op.EXPRESSION) {
      BigQuerySQLEngineUtils
        .validateOnExpressionJoinCondition((JoinCondition.OnExpression) joinDefinition.getCondition(),
                                           validationProblems);
    }

    // Validate join stages for join on keys
    if (joinDefinition.getCondition().getOp() == JoinCondition.Op.KEY_EQUALITY) {
      BigQuerySQLEngineUtils.validateJoinOnKeyStages(joinDefinition, validationProblems);
    }

    if (!validationProblems.isEmpty()) {
      LOG.warn("Join operation for stage '{}' could not be executed in BigQuery. Issues found: {}.",
               sqlJoinDefinition.getDatasetName(),
               String.join("; ", validationProblems));
    }

    return validationProblems.isEmpty();
  }


  @Override
  public SQLDataset join(SQLJoinRequest sqlJoinRequest) throws SQLEngineException {
    // Get SQL builder for this Join operation
    BigQueryJoinSQLBuilder builder = new BigQueryJoinSQLBuilder(
      sqlJoinRequest.getJoinDefinition(),
      DatasetId.of(datasetProject, datasetName),
      getStageNameToBQTableNameMap());

    // Execute Select job with the supplied query.
    return executeSelect(sqlJoinRequest.getDatasetName(),
                         sqlJoinRequest.getJoinDefinition().getOutputSchema(),
                         BigQueryJobType.JOIN,
                         builder.getQuery());
  }

  @Nullable
  @Override
  public SQLDatasetProducer getProducer(SQLPullRequest pullRequest, PullCapability capability) {
    // We only support the Spark RDD pull capability if the Storage Read API is enabled.
    if (!sqlEngineConfig.shouldUseStorageReadAPI() || !(capability == DefaultPullCapability.SPARK_RDD_PULL)) {
      return null;
    }

    String table = datasets.get(pullRequest.getDatasetName()).getBigQueryTable();

    return new BigQuerySparkDatasetProducer(sqlEngineConfig,
                                            datasetProject,
                                            datasetName,
                                            table,
                                            pullRequest.getDatasetSchema());
  }

  @Override
  public Set getPullCapabilities() {
    // If the Storage Read API is not enabled, skip this.
    if (!sqlEngineConfig.shouldUseStorageReadAPI()) {
      return Collections.emptySet();
    }

    return Collections.singleton(DefaultPullCapability.SPARK_RDD_PULL);
  }

  @Override
  public SQLReadResult read(SQLReadRequest readRequest) throws SQLEngineException {
    String datasetName = readRequest.getDatasetName();

    // TODO: implement direct source read toggle
    // Check if direct sink write is enabled. If not, skip.
//    if (!sqlEngineConfig.shouldUseDirectSinkWrite()) {
//      return SQLReadResult.unsupported(datasetName);
//    }

    // Check if this output matches the expected engine. If it doesn't, skip execution for this write operation.;
    if (!BigQuerySQLEngine.class.getName().equals(readRequest.getInput().getSqlEngineClassName())) {
      LOG.debug("Got output for another SQL engine {}, skipping", readRequest.getInput().getSqlEngineClassName());
      return SQLReadResult.unsupported(datasetName);
    }

    // Get source table information (from the stage we are attempting to write into the sink)
    String destinationTable = BigQuerySQLEngineUtils.getNewTableName(runId);

    // Create empty table to store query results.
    TableId destinationTableId = TableId.of(datasetProject, this.datasetName, destinationTable);

    // Build Big Query Write instance and execute write operation.
    BigQueryReadDataset readDataset = BigQueryReadDataset.getInstance(datasetName,
                                                                      sqlEngineConfig,
                                                                      bigQuery,
                                                                      readRequest,
                                                                      destinationTableId,
                                                                      metrics);
    SQLReadResult result = readDataset.read();

    if (result.isSuccessful()) {
      datasets.put(datasetName, readDataset);
    }

    return result;
  }

  @Override
  public SQLWriteResult write(SQLWriteRequest writeRequest) {
    String datasetName = writeRequest.getDatasetName();

    // Check if this output matches the expected engine. If it doesn't, skip execution for this write operation.;
    if (!BigQuerySQLEngine.class.getName().equals(writeRequest.getOutput().getSqlEngineClassName())) {
      LOG.debug("Got output for another SQL engine {}, skipping", writeRequest.getOutput().getSqlEngineClassName());
      return SQLWriteResult.unsupported(datasetName);
    }

    // Get source table information (from the stage we are attempting to write into the sink)
    String sourceTable = datasets.get(writeRequest.getDatasetName()).getBigQueryTable();
    TableId sourceTableId = TableId.of(datasetProject, this.datasetName, sourceTable);

    // Build Big Query Write instance and execute write operation.
    BigQueryWrite bigQueryWrite = BigQueryWrite.getInstance(datasetName,
                                                            sqlEngineConfig,
                                                            bigQuery,
                                                            writeRequest,
                                                            sourceTableId,
                                                            metrics);
    return bigQueryWrite.write();
  }

  @Override
  public void cleanup(String datasetName) throws SQLEngineException {

    BigQuerySQLDataset bqDataset = datasets.get(datasetName);
    if (bqDataset == null) {
      return;
    }

    LOG.info("Cleaning up dataset {}", datasetName);

    SQLEngineException ex = null;

    // Cancel BQ job
    try {
      cancelJob(datasetName, bqDataset);
    } catch (BigQueryException e) {
      LOG.error("Exception when cancelling BigQuery job '{}' for stage '{}': {}",
                bqDataset.getJobId(), datasetName, e.getMessage());
      ex = new SQLEngineException(String.format("Exception when executing cleanup for stage '%s'", datasetName), e);
    }

    // Delete BQ Table
    try {
      deleteTable(datasetName, bqDataset);
    } catch (BigQueryException e) {
      LOG.error("Exception when deleting BigQuery table '{}' for stage '{}': {}",
                bqDataset.getBigQueryTable(), datasetName, e.getMessage());
      if (ex == null) {
        ex = new SQLEngineException(String.format("Exception when executing cleanup for stage '%s'", datasetName), e);
      } else {
        ex.addSuppressed(e);
      }
    }

    // Delete temporary folder
    try {
      deleteTempFolder(bqDataset);
    } catch (IOException e) {
      LOG.error("Failed to delete temporary directory '{}' for stage '{}': {}",
                bqDataset.getGCSPath(), datasetName, e.getMessage());
      if (ex == null) {
        ex = new SQLEngineException(String.format("Exception when executing cleanup for stage '%s'", datasetName), e);
      } else {
        ex.addSuppressed(e);
      }
    }

    // Throw all collected exceptions, if any.
    if (ex != null) {
      throw ex;
    }
  }

  /**
   * @return capabilities provided by {@link BigQuerySQLEngine}, which include {@link StringExpressionFactoryType}.SQL.
   */
  @Override
  public Set getCapabilities() {
    return Collections.singleton(StringExpressionFactoryType.SQL);
  }

  /**
   * @return the single expression factory provided by {@link BigQuerySQLEngine}, which is {@link SQLExpressionFactory}.
   */
  @Override
  public List> getExpressionFactories() {
    return Collections.singletonList(new SQLExpressionFactory());
  }

  @Override
  public Relation getRelation(SQLRelationDefinition relationDefinition) {
    // Builds set of columns to be used for this relation based on the schema. This set maintains field order
    Set columnSet = new LinkedHashSet<>();
    List fields = relationDefinition.getSchema().getFields();
    if (fields != null) {
      for (Schema.Field field : fields) {
        columnSet.add(field.getName());
      }
    }
    String datasetName = relationDefinition.getDatasetName();

    return BigQueryRelation.getInstance(datasetName, columnSet, ctx);
  }

  @Override
  public Engine getRelationalEngine() {
    return this;
  }

  @Override
  public boolean supportsRelationalTranform() {
    return true;
  }

  @Override
  public boolean supportsInputSchema(Schema schema) {
    return BigQuerySQLEngineUtils.isSupportedSchema(schema);
  }

  @Override
  public boolean supportsOutputSchema(Schema schema) {
    return BigQuerySQLEngineUtils.isSupportedSchema(schema);
  }

  @Override
  public Set getIncludedStageNames() {
    return sqlEngineConfig.getIncludedStages();
  }

  @Override
  public Set getExcludedStageNames() {
    return sqlEngineConfig.getExcludedStages();
  }

  @Override
  public boolean canTransform(SQLTransformDefinition transformDefinition) {
    Relation relation = transformDefinition.getOutputRelation();
    return relation instanceof BigQueryRelation && relation.isValid();
  }

  @Override
  public SQLDataset transform(SQLTransformRequest context) throws SQLEngineException {
    // Get relation instance
    BigQueryRelation relation = (BigQueryRelation) context.getOutputRelation();

    // Set input datasets
    Map bqDatasets = context.getInputDataSets().entrySet()
      .stream()
      .collect(Collectors.toMap(
        Map.Entry::getKey,
        e -> (BigQuerySQLDataset) e.getValue()));

    // Set input datasets for relation.
    relation.setInputDatasets(bqDatasets);

    // Execute select with the generated expression.
    return executeSelect(context.getOutputDatasetName(),
                         context.getOutputSchema(),
                         BigQueryJobType.TRANSFORM,
                         relation.getSQLStatement());
  }

  private BigQuerySelectDataset executeSelect(String datasetName,
                                              Schema outputSchema,
                                              BigQueryJobType jobType,
                                              String query) {
    LOG.info("Executing {} operation for dataset {}", jobType.getType(), datasetName);

    // Get new Job ID for this push operation
    String jobId = BigQuerySQLEngineUtils.newIdentifier();

    // Build new table name for this dataset
    String table = BigQuerySQLEngineUtils.getNewTableName(runId);

    // Create empty table to store query results.
    BigQuerySQLEngineUtils.createEmptyTable(sqlEngineConfig, bigQuery, project, this.datasetName, table);

    BigQuerySelectDataset selectDataset = BigQuerySelectDataset.getInstance(
      datasetName,
      outputSchema,
      sqlEngineConfig,
      bigQuery,
      project,
      DatasetId.of(datasetProject, this.datasetName),
      table,
      jobId,
      jobType,
      query,
      metrics
    ).execute();

    datasets.put(datasetName, selectDataset);

    LOG.info("Executed {} operation for dataset {}", jobType.getType(), datasetName);
    return selectDataset;
  }

  /**
   * Get a map that contains stage names as keys and BigQuery tables as Values.
   *
   * @return map representing all stages currently pushed to BQ.
   */
  protected Map getStageNameToBQTableNameMap() {
    return datasets.entrySet()
      .stream()
      .collect(Collectors.toMap(
        Map.Entry::getKey,
        e -> e.getValue().getBigQueryTable()
      ));
  }

  /**
   * Stops the running job for the supplied dataset
   *
   * @param stageName the name of the stage in CDAP
   * @param bqDataset the BigQuery Dataset Instance
   */
  protected void cancelJob(String stageName, BigQuerySQLDataset bqDataset) throws BigQueryException {
    // Skip cancellation if tables need to be retained.
    if (sqlEngineConfig.shouldRetainTables()) {
      return;
    }

    String jobId = bqDataset.getJobId();

    // If this dataset does not specify a job ID, there's no need to cancel any job
    if (jobId == null) {
      return;
    }

    String tableName = bqDataset.getBigQueryTable();
    Job job = bigQuery.getJob(jobId);

    if (job == null) {
      return;
    }

    if (!job.cancel()) {
      LOG.error("Unable to cancel BigQuery job '{}' for table '{}' and stage '{}'", jobId, tableName, stageName);
    }
  }

  /**
   * Deletes the BigQuery table for the supplied dataset
   *
   * @param stageName the name of the stage in CDAP
   * @param bqDataset the BigQuery Dataset Instance
   */
  protected void deleteTable(String stageName, BigQuerySQLDataset bqDataset) throws BigQueryException {
    // Skip deletion if tables need to be retained.
    if (sqlEngineConfig.shouldRetainTables()) {
      return;
    }

    String tableName = bqDataset.getBigQueryTable();
    TableId tableId = TableId.of(datasetProject, datasetName, tableName);

    // Delete this table if found
    if (bigQuery.getTable(tableId) != null) {
      bigQuery.delete(tableId);
    }
  }

  /**
   * Deletes the temporary folder used by a certain BQ dataset.
   *
   * @param bqDataset the BigQuery Dataset Instance
   */
  protected void deleteTempFolder(BigQuerySQLDataset bqDataset) throws IOException {
    String gcsPath = bqDataset.getGCSPath();

    // If this dataset does not use temporary storage, skip this step
    if (gcsPath == null) {
      return;
    }

    BigQueryUtil.deleteTemporaryDirectory(configuration, gcsPath);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy