Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.cdap.plugin.gcp.bigquery.action.BigQueryArgumentSetterConfig Maven / Gradle / Ivy
/*
* Copyright © 2020 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.plugin.gcp.bigquery.action;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.FieldList;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.QueryParameterValue;
import com.google.cloud.bigquery.StandardTableDefinition;
import com.google.cloud.bigquery.Table;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Macro;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.plugin.common.ConfigUtil;
import io.cdap.plugin.gcp.bigquery.source.BigQuerySource;
import io.cdap.plugin.gcp.bigquery.util.BigQueryUtil;
import java.util.AbstractMap.SimpleEntry;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
/**
* Holds configuration required for configuring {@link BigQuerySource}.
*/
public final class BigQueryArgumentSetterConfig extends AbstractBigQueryActionConfig {
public static final String NAME_DATASET = "dataset";
public static final String NAME_TABLE = "table";
public static final String NAME_DATASET_PROJECT = "datasetProject";
public static final String NAME_ARGUMENT_SELECTION_CONDITIONS = "argumentSelectionConditions";
public static final String NAME_ARGUMENTS_COLUMNS = "argumentsColumns";
private static final String QUERY_TEMPLATE = "Select %s from %s where %s";
@Name(NAME_DATASET)
@Macro
@Description(
"The dataset the table belongs to. A dataset is contained within a specific project. "
+ "Datasets are top-level containers that are used to organize and control access to tables and views.")
private String dataset;
@Name(NAME_TABLE)
@Macro
@Description(
"The table to read from. A table contains individual records organized in rows. "
+ "Each record is composed of columns (also called fields). "
+ "Every table is defined by a schema that describes the column names, data types, and other information.")
private String table;
@Name(NAME_ARGUMENT_SELECTION_CONDITIONS)
@Macro
@Description(
"A set of conditions for identifying the arguments to run a pipeline."
+ "A particular use case for this would be feed=marketing;date=20200427.")
private String argumentSelectionConditions;
@Name(NAME_ARGUMENTS_COLUMNS)
@Macro
@Description(
"Name of the columns, separated by `,` ,that contains the arguments for this run."
+ "A particular use case for this would be country, device")
private String argumentsColumns;
private BigQueryArgumentSetterConfig(
String datasetProject,
String dataset,
String table,
String argumentSelectionConditions,
String argumentsColumns) {
this.datasetProject = datasetProject;
this.dataset = dataset;
this.table = table;
this.argumentSelectionConditions = argumentSelectionConditions;
this.argumentsColumns = argumentsColumns;
}
public String getDataset() {
return dataset;
}
public String getTable() {
return table;
}
public String getArgumentSelectionConditions() {
return argumentSelectionConditions;
}
@Nullable
public String getArgumentsColumns() {
return argumentsColumns;
}
@Override
public void validate(FailureCollector collector) {
validateProperties(collector);
if (canConnect()) {
getQueryJobConfiguration(collector);
}
}
public void validateProperties(FailureCollector collector) {
if (!containsMacro(NAME_DATASET)) {
BigQueryUtil.validateDataset(dataset, NAME_DATASET, collector);
}
if (!containsMacro(NAME_TABLE)) {
BigQueryUtil.validateTable(table, NAME_TABLE, collector);
}
validateArgumentsSelectionConditions(getArgumentSelectionConditions(), collector);
validateArgumentsColumns(argumentsColumns, collector);
collector.getOrThrowException();
}
private boolean canConnect() {
return !containsMacro(NAME_DATASET)
&& !containsMacro(NAME_TABLE)
&& !containsMacro(NAME_DATASET_PROJECT)
&& !containsMacro(NAME_SERVICE_ACCOUNT_TYPE)
&& !(containsMacro(NAME_SERVICE_ACCOUNT_FILE_PATH) || containsMacro(NAME_SERVICE_ACCOUNT_JSON))
&& !containsMacro(NAME_PROJECT)
&& !containsMacro(NAME_ARGUMENT_SELECTION_CONDITIONS)
&& !containsMacro(argumentsColumns);
}
private void validateArgumentsSelectionConditions(
String argumentSelectionConditions, FailureCollector collector) {
if (containsMacro(NAME_ARGUMENT_SELECTION_CONDITIONS)) {
return;
}
if (Strings.isNullOrEmpty(argumentSelectionConditions)) {
collector.addFailure("Arguments Selection Conditions is empty.",
"Arguments Selection condition can not be empty.")
.withConfigProperty(NAME_ARGUMENT_SELECTION_CONDITIONS);
return;
}
try {
getArgumentSelectionConditionsMap();
} catch (Exception e) {
collector.addFailure(e.getMessage(), "Invalid key value pair for Argument Selection Conditions.")
.withConfigProperty(NAME_ARGUMENT_SELECTION_CONDITIONS);;
}
}
private void validateArgumentsColumns(String argumentsColumns, FailureCollector collector) {
if (containsMacro(NAME_ARGUMENTS_COLUMNS)) {
return;
}
if (Strings.isNullOrEmpty(argumentsColumns)) {
collector.addFailure("Arguments Columns is empty.", "Arguments Columns can not be empty.")
.withConfigProperty(NAME_ARGUMENTS_COLUMNS);
}
}
public QueryJobConfiguration getQueryJobConfiguration(FailureCollector collector) {
Table sourceTable = BigQueryUtil.getBigQueryTable(getDatasetProject(), dataset, table, getServiceAccount(),
isServiceAccountFilePath(), collector);
if (sourceTable == null) {
// Table does not exist
collector.addFailure(String.format("BigQuery table '%s:%s.%s' does not exist.",
getDatasetProject(), dataset, table),
"Ensure correct table name is provided.")
.withConfigProperty(NAME_TABLE);
throw collector.getOrThrowException();
}
StandardTableDefinition tableDefinition = Objects.requireNonNull(sourceTable).getDefinition();
FieldList fields = Objects.requireNonNull(tableDefinition.getSchema()).getFields();
Map argumentConditionMap = getArgumentSelectionConditionsMap();
Map argumentConditionFields = extractArgumentsFields(fields, argumentConditionMap);
checkIfArgumentsColumnsExitsInSource(argumentConditionMap, argumentConditionFields);
checkIfArgumentsColumnsListExistsInSource(getArgumentsColumnsList(), fields, collector);
String selectClause = getSelectClause();
String whereCondition = getWhereCondition(argumentConditionMap.keySet());
String tableName = dataset + "." + table;
String query = String.format(QUERY_TEMPLATE, selectClause, tableName, whereCondition);
QueryJobConfiguration.Builder queryJobConfiguration = QueryJobConfiguration.newBuilder(query);
getParametersValues(argumentConditionMap.entrySet(), argumentConditionFields)
.forEach(
stringQueryParameterValueSimpleEntry ->
queryJobConfiguration.addNamedParameter(
stringQueryParameterValueSimpleEntry.getKey(),
stringQueryParameterValueSimpleEntry.getValue()));
return queryJobConfiguration.build();
}
private void checkIfArgumentsColumnsExitsInSource(Map argumentConditionMap,
Map argumentConditionFields) {
if (argumentConditionMap.size() == argumentConditionFields.size()) {
return;
}
String nonExistingColumnNames = argumentConditionMap.keySet().stream()
.filter(columnName -> !argumentConditionFields.containsKey(columnName))
.collect(Collectors.joining(" ,"));
throw new RuntimeException(String.format(
"Columns: \" %s \"do not exist in table. Argument selections columns must exist in table.",
nonExistingColumnNames));
}
static void checkIfArgumentsColumnsListExistsInSource(
List argumentsColumnsList, FieldList fields, FailureCollector collector) {
Set fieldsNames = fields.stream().map(Field::getName).collect(Collectors.toSet());
List nonExistingColumns = argumentsColumnsList.stream()
.filter(columnName -> !fieldsNames.contains(columnName))
.collect(Collectors.toList());
if (!nonExistingColumns.isEmpty()) {
String formattedNonExistingColumnsList = String.join(" ,", nonExistingColumns);
String[] oneManyColumn = {"Column", "Columns"};
String[] oneManyDo = {"does", "do"};
int oneManyIndex = nonExistingColumns.size() == 1 ? 0 : 1;
collector.addFailure(
String.format("%s: \"%s\" %s not exist in table. Argument columns must exist in table.",
oneManyColumn[oneManyIndex], formattedNonExistingColumnsList, oneManyDo[oneManyIndex]),
"Ensure correct column names are provided.")
.withConfigProperty(NAME_ARGUMENTS_COLUMNS);
}
}
private Map extractArgumentsFields(
FieldList fields, Map argumentConditionKeyPair) {
return fields.stream()
.filter(field -> argumentConditionKeyPair.containsKey(field.getName()))
.collect(Collectors.toMap(Field::getName, Function.identity()));
}
private String getSelectClause() {
return String.join(" ,", getArgumentsColumnsList());
}
private List> getParametersValues(
Set> argumentConditionKeyPair,
Map argumentConditionFields) {
return argumentConditionKeyPair.stream()
.map(
entry -> {
Field field = argumentConditionFields.get(entry.getKey());
String value = entry.getValue();
QueryParameterValue build =
QueryParameterValue.newBuilder()
.setType(field.getType().getStandardType())
.setValue(value)
.build();
return new SimpleEntry<>(entry.getKey(), build);
})
.collect(Collectors.toList());
}
private String getWhereCondition(Set argumentConditionKey) {
return argumentConditionKey.stream()
.map(columnName -> String.format("%s = @%s ", columnName, columnName))
.collect(Collectors.joining(" AND "));
}
private Map getArgumentSelectionConditionsMap() {
return ConfigUtil.parseKeyValueConfig(argumentSelectionConditions, ";", "=");
}
private List getArgumentsColumnsList() {
String[] parts = getArgumentsColumns().split(",");
return Lists.newArrayList(parts);
}
public static Builder builder() {
return new Builder();
}
/**
* Builder for testing BigQueryArgumentSetterConfig
*/
public static class Builder {
private String dataset;
private String datasetProject;
private String table;
private String argumentSelectionConditions;
private String argumentsColumns;
private Builder() {
}
public Builder setDatasetProject(String datasetProject) {
this.datasetProject = datasetProject;
return this;
}
public Builder setDataset(String dataset) {
this.dataset = dataset;
return this;
}
public Builder setTable(String table) {
this.table = table;
return this;
}
public Builder setArgumentSelectionConditions(
String argumentSelectionConditions) {
this.argumentSelectionConditions = argumentSelectionConditions;
return this;
}
public Builder setArgumentsColumns(String argumentsColumns) {
this.argumentsColumns = argumentsColumns;
return this;
}
public BigQueryArgumentSetterConfig build() {
return new BigQueryArgumentSetterConfig(datasetProject, dataset, table,
argumentSelectionConditions, argumentsColumns);
}
}
}