All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.batch.sink.KVTableSink Maven / Gradle / Ivy

There is a newer version: 2.12.3
Show newest version
/*
 * Copyright © 2015-2019 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.plugin.batch.sink;

import com.google.common.base.Preconditions;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.annotation.Requirements;
import io.cdap.cdap.api.common.Bytes;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.dataset.lib.KeyValue;
import io.cdap.cdap.api.dataset.lib.KeyValueTable;
import io.cdap.cdap.etl.api.Emitter;
import io.cdap.cdap.etl.api.PipelineConfigurer;
import io.cdap.plugin.common.BatchReadableWritableConfig;
import io.cdap.plugin.common.Properties;
import io.cdap.plugin.common.SchemaValidator;

import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;

/**
 * CDAP KVTable Dataset Batch Sink.
 */
@Plugin(type = "batchsink")
@Name("KVTable")
@Description("Writes records to a KeyValueTable, using configurable fields from input records as the key and value.")
@Requirements(datasetTypes = KeyValueTable.TYPE)
public class KVTableSink extends BatchWritableSink {

  private static final String KEY_FIELD_DESC = "The name of the field to use as the key. Defaults to 'key'.";
  private static final String VALUE_FIELD_DESC = "The name of the field to use as the value. Defaults to 'value'.";

  /**
   * Config class for KVTableSink
   */
  public static class KVTableConfig extends BatchReadableWritableConfig {

    @Name(Properties.KeyValueTable.KEY_FIELD)
    @Description(KEY_FIELD_DESC)
    @Nullable
    private String keyField;

    @Name(Properties.KeyValueTable.VALUE_FIELD)
    @Description(VALUE_FIELD_DESC)
    @Nullable
    private String valueField;

    public KVTableConfig() {
      this(null, Properties.KeyValueTable.DEFAULT_KEY_FIELD, Properties.KeyValueTable.DEFAULT_VALUE_FIELD);
    }

    public KVTableConfig(String name, String keyField, String valueField) {
      super(name);
      this.keyField = keyField;
      this.valueField = valueField;
    }
  }

  private final KVTableConfig kvTableConfig;

  public KVTableSink(KVTableConfig kvTableConfig) {
    super(kvTableConfig);
    this.kvTableConfig = kvTableConfig;
  }

  @Override
  public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
    super.configurePipeline(pipelineConfigurer);
    Schema inputSchema = pipelineConfigurer.getStageConfigurer().getInputSchema();
    // validate that input and output fields are present
    if (inputSchema != null) {
      SchemaValidator.validateFieldsArePresentInSchema(inputSchema, kvTableConfig.keyField);
      SchemaValidator.validateFieldsArePresentInSchema(inputSchema, kvTableConfig.valueField);
      validateSchemaTypeIsStringOrBytes(inputSchema, kvTableConfig.keyField, false);
      validateSchemaTypeIsStringOrBytes(inputSchema, kvTableConfig.valueField, true);
    }
  }

  private void validateSchemaTypeIsStringOrBytes(Schema inputSchema, String fieldName, boolean isNullable) {
    Schema fieldSchema = inputSchema.getField(fieldName).getSchema();
    boolean fieldIsNullable = fieldSchema.isNullable();
    if (!isNullable && fieldIsNullable) {
      throw new IllegalArgumentException("Field " + fieldName + " cannot be nullable");
    }
    Schema.Type fieldType = fieldIsNullable ? fieldSchema.getNonNullable().getType() : fieldSchema.getType();

    if (fieldType != Schema.Type.STRING && fieldType != Schema.Type.BYTES) {
      throw new IllegalArgumentException(
        String.format("Field name %s is of type %s, only types String and Bytes are supported for KVTable",
                      fieldName, fieldType));
    }
  }

  @Override
  protected Map getProperties() {
    Map properties;
    properties = new HashMap<>(kvTableConfig.getProperties().getProperties());

    properties.put(Properties.BatchReadableWritable.NAME, kvTableConfig.getName());
    properties.put(Properties.BatchReadableWritable.TYPE, KeyValueTable.class.getName());
    return properties;
  }

  @Override
  public void transform(StructuredRecord input, Emitter> emitter) throws Exception {
    Object key = input.get(kvTableConfig.keyField);
    Preconditions.checkArgument(key != null, "Key cannot be null.");
    byte[] keyBytes;

    Schema keyFieldSchema = input.getSchema().getField(kvTableConfig.keyField).getSchema();
    if (keyFieldSchema.getType().equals(Schema.Type.STRING)) {
      keyBytes = Bytes.toBytes((String) key);
    } else if (keyFieldSchema.getType().equals(Schema.Type.BYTES)) {
      keyBytes = key instanceof ByteBuffer ? Bytes.toBytes((ByteBuffer) key) : (byte[]) key;
    } else if (keyFieldSchema.isNullable()) {
      throw new Exception(
        String.format("Key field %s cannot have nullable schema %s", kvTableConfig.keyField, keyFieldSchema));
    } else {
      throw new Exception(
        String.format("Key field %s cannot have schema %s. It must of either String or Bytes",
                      kvTableConfig.keyField, keyFieldSchema));
    }

    Schema.Field valueFieldSchema = input.getSchema().getField(kvTableConfig.valueField);
    if (valueFieldSchema == null) {
      throw new Exception("Value Field " + kvTableConfig.valueField + " is missing in the input record");
    }

    byte[] valBytes = null;
    Object val = input.get(kvTableConfig.valueField);
    if (val != null) {
      Schema.Type valueFieldType =
        valueFieldSchema.getSchema().isNullable() ? valueFieldSchema.getSchema().getNonNullable().getType() :
          valueFieldSchema.getSchema().getType();
      if (valueFieldType.equals(Schema.Type.STRING)) {
        valBytes = Bytes.toBytes((String) val);
      } else if (valueFieldType.equals(Schema.Type.BYTES)) {
        valBytes = val instanceof ByteBuffer ? Bytes.toBytes((ByteBuffer) val) : (byte[]) val;
      } else {
        throw new Exception(
          String.format("Value field %s cannot have schema %s. It must of either String or Bytes",
                        kvTableConfig.valueField, valueFieldSchema));
      }
    }
    emitter.emit(new KeyValue<>(keyBytes, valBytes));
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy