io.cdap.plugin.gcp.spanner.sink.SpannerSinkConfig Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-cloud Show documentation
Show all versions of google-cloud Show documentation
Plugins for Google Big Query
/*
* Copyright © 2018 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.plugin.gcp.spanner.sink;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Macro;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.plugin.common.ConfigUtil;
import io.cdap.plugin.common.Constants;
import io.cdap.plugin.common.IdUtils;
import io.cdap.plugin.gcp.common.CmekUtils;
import io.cdap.plugin.gcp.common.GCPConnectorConfig;
import io.cdap.plugin.gcp.spanner.common.SpannerUtil;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
/**
* Spanner sink config
*/
public class SpannerSinkConfig extends PluginConfig {
private static final int DEFAULT_SPANNER_WRITE_BATCH_SIZE = 100;
private static final Set SUPPORTED_TYPES =
ImmutableSet.of(Schema.Type.BOOLEAN, Schema.Type.STRING, Schema.Type.INT, Schema.Type.LONG,
Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.BYTES, Schema.Type.ARRAY);
public static final String NAME_TABLE = "table";
public static final String NAME_BATCH_SIZE = "batchSize";
public static final String NAME_INSTANCE = "instance";
public static final String NAME_DATABASE = "database";
public static final String NAME_KEYS = "keys";
public static final String NAME_SCHEMA = "schema";
private static final String NAME_CMEK_KEY = "cmekKey";
@Name(NAME_TABLE)
@Description("Cloud Spanner table id. Uniquely identifies your table within the Cloud Spanner database."
+ " Note: A table with no primary key columns can have only one row."
+ " Only GoogleSQL-dialect databases can have tables without a primary key."
+ " Please refer to https://cloud.google.com/spanner/docs/schema-and-data-model for more details.")
@Macro
private String table;
@Name(NAME_BATCH_SIZE)
@Description("Size of the batched writes to the Spanner table. " +
"When the number of buffered mutations is greater than this batchSize, " +
"the mutations are written to Spanner table, Default value is 100")
@Macro
@Nullable
private Integer batchSize;
@Name(NAME_INSTANCE)
@Description("Cloud Spanner instance id. " +
"Uniquely identifies Cloud Spanner instance within your Google Cloud Platform project.")
@Macro
private String instance;
@Name(NAME_DATABASE)
@Description("Cloud Spanner database id. Uniquely identifies your database within the Cloud Spanner instance.")
@Macro
private String database;
@Name(NAME_KEYS)
@Nullable
@Description("Primary keys to be used to create spanner table, if the spanner table does not exist.")
@Macro
private String keys;
@Name(NAME_SCHEMA)
@Nullable
@Description("Schema of the Spanner table.")
@Macro
private String schema;
@Name(NAME_CMEK_KEY)
@Macro
@Nullable
@Description("The GCP customer managed encryption key (CMEK) name used to encrypt data written to " +
"any database created by the plugin. If the database already exists, this is ignored. More information can be " +
"found at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys")
protected String cmekKey;
@Name(Constants.Reference.REFERENCE_NAME)
@Nullable
@Description("This will be used to uniquely identify this source for lineage, annotating metadata, etc.")
public String referenceName;
@Name(ConfigUtil.NAME_USE_CONNECTION)
@Nullable
@Description("Whether to use an existing connection.")
private Boolean useConnection;
@Name(ConfigUtil.NAME_CONNECTION)
@Macro
@Nullable
@Description("The existing connection to use.")
protected GCPConnectorConfig connection;
public SpannerSinkConfig(String referenceName, String table, @Nullable Integer batchSize, String instance,
String database, @Nullable String keys, String schema, @Nullable String cmekKey) {
this.referenceName = referenceName;
this.table = table;
this.batchSize = batchSize;
this.instance = instance;
this.database = database;
this.keys = keys;
this.schema = schema;
this.cmekKey = cmekKey;
}
/**
* Returns the reference name
*
* @return referenceName
*/
public String getReferenceName() {
return referenceName;
}
public String getTable() {
return table;
}
public String getInstance() {
return instance;
}
public String getDatabase() {
return database;
}
@Nullable
public String getKeys() {
return keys;
}
@Nullable
public GCPConnectorConfig getConnection() {
return connection;
}
public void validate(FailureCollector collector) {
validate(collector, Collections.emptyMap());
}
public void validate(FailureCollector collector, Map arguments) {
if (!Strings.isNullOrEmpty(referenceName)) {
IdUtils.validateReferenceName(referenceName, collector);
}
ConfigUtil.validateConnection(this, useConnection, connection, collector);
Schema schema = getSchema(collector);
if (!containsMacro(NAME_SCHEMA) && schema != null) {
// validate output schema
SpannerUtil.validateSchema(schema, SUPPORTED_TYPES, collector);
}
if (!containsMacro(NAME_BATCH_SIZE) && batchSize != null && batchSize < 1) {
collector.addFailure("Invalid spanner batch size for writes.", "Ensure the value is a positive number.")
.withConfigProperty(NAME_BATCH_SIZE);
}
if (!containsMacro(NAME_KEYS) && keys != null && !containsMacro(NAME_SCHEMA) && schema != null) {
String[] splitted = keys.split(",");
for (String key : splitted) {
if (schema.getField(key.trim()) == null) {
collector.addFailure(String.format("Primary key field '%s' does not exist in the schema.", key),
"Change the Primary key field to be one of the schema fields.")
.withConfigElement(NAME_KEYS, key);
}
}
}
if (!containsMacro(NAME_CMEK_KEY)) {
CmekUtils.getCmekKey(cmekKey, arguments, collector);
}
}
/**
* Returns schema object.
*/
@Nullable
public Schema getSchema(FailureCollector collector) {
if (Strings.isNullOrEmpty(schema)) {
return null;
}
try {
return Schema.parseJson(schema);
} catch (IOException e) {
collector.addFailure("Invalid schema: " + e.getMessage(), null).withConfigProperty(NAME_SCHEMA);
}
// if there was an error that was added, it will throw an exception, otherwise, this statement will not be executed
throw collector.getOrThrowException();
}
public int getBatchSize() {
return batchSize == null ? DEFAULT_SPANNER_WRITE_BATCH_SIZE : batchSize;
}
}