All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.gcp.bigquery.action.BigQueryArgumentSetter Maven / Gradle / Ivy

/*
 * Copyright © 2020 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.plugin.gcp.bigquery.action;

import com.google.auth.Credentials;
import com.google.cloud.StringEnumValue;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.FieldValue;
import com.google.cloud.bigquery.FieldValueList;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.TableResult;
import com.google.common.collect.ImmutableSet;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.etl.api.action.Action;
import io.cdap.cdap.etl.api.action.ActionContext;
import io.cdap.plugin.gcp.common.GCPUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Set;
import java.util.stream.Collectors;

/**
 * This class BigQueryArgumentSetter executes a single Cloud BigQuery SQL.
 * 

* The plugin provides the ability to map columns name as pipeline arguments name and columns values * as pipeline arguments */ @Plugin(type = Action.PLUGIN_TYPE) @Name(BigQueryArgumentSetter.NAME) @Description("Argument setter for dynamically configuring pipeline from BiqQuery table.") public final class BigQueryArgumentSetter extends AbstractBigQueryAction { private static final Logger LOG = LoggerFactory.getLogger(BigQueryArgumentSetter.class); public static final String NAME = "BigQueryArgumentSetter"; private static final Set SUPPORTED_SQL_TYPES = ImmutableSet.of( LegacySQLTypeName.BOOLEAN, LegacySQLTypeName.STRING, LegacySQLTypeName.FLOAT, LegacySQLTypeName.INTEGER, LegacySQLTypeName.NUMERIC, LegacySQLTypeName.TIMESTAMP); private BigQueryArgumentSetterConfig config; @Override public AbstractBigQueryActionConfig getConfig() { return config; } @Override public void run(ActionContext context) throws Exception { config.validate(context.getFailureCollector()); QueryJobConfiguration queryConfig = config.getQueryJobConfiguration(context.getFailureCollector()); JobId jobId = JobId.newBuilder().setRandomJob().build(); // API request - starts the query. Credentials credentials = config.getServiceAccount() == null ? null : GCPUtils.loadServiceAccountCredentials(config.getServiceAccount(), config.isServiceAccountFilePath()); BigQuery bigQuery = GCPUtils.getBigQuery(config.getProject(), credentials); Job queryJob = bigQuery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build()); LOG.info("Executing SQL as job {}.", jobId.getJob()); LOG.debug("The BigQuery SQL {}", queryConfig.getQuery()); // Wait for the query to complete queryJob.waitFor(); // Check for errors if (queryJob.getStatus().getError() != null) { throw new RuntimeException(queryJob.getStatus().getExecutionErrors().toString()); } TableResult queryResults = queryJob.getQueryResults(); if (queryResults.getTotalRows() == 0 || queryResults.getTotalRows() > 1) { throw new RuntimeException(String.format("The query result total rows should be \"1\" but is \"%d\"", queryResults.getTotalRows())); } Schema schema = queryResults.getSchema(); FieldValueList row = queryResults.iterateAll().iterator().next(); for (int i = 0; i < schema.getFields().size(); i++) { Field field = schema.getFields().get(i); if (!SUPPORTED_SQL_TYPES.contains(field.getType())) { context.getFailureCollector().addFailure( String.format("Field '%s' with type '%s' , is not supported.", field.getName(), field.getType().name()), String.format("Supported types are: %s", SUPPORTED_SQL_TYPES.stream().map(StringEnumValue::name) .collect(Collectors.joining(",")))); context.getFailureCollector().getOrThrowException(); } String name = field.getName(); FieldValue fieldValue = row.get(name); // For type LegacySQLTypeName.TIMESTAMP string value will be seconds since epoch // (e.g. 1408452095.22 == 2014-08-19 07:41:35.220 -05:00) context.getArguments().set(name, fieldValue.getStringValue()); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy