All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.common.SchemaConvert Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.common;

import com.google.common.base.Preconditions;
import cz.proto.*;
import cz.proto.ingestion.Ingestion.IGSTableType;
import org.apache.kudu.KuduCommon;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple3;

import java.util.*;

public abstract class SchemaConvert {

    private static final Logger LOG = LoggerFactory.getLogger(SchemaConvert.class);

    public static Map> virtualMap =
        new HashMap>() {{
            put(Constant.VIRTUAL_BUCKET_KEY,
                new Tuple3<>(ColumnTypeAttributes.ColumnSpecialType.CZ_HIDDEN_BUCKET_ID, Type.INT32, KuduCommon.DataType.UINT32));
            put(Constant.VIRTUAL_CLUSTER_KEY,
                new Tuple3<>(ColumnTypeAttributes.ColumnSpecialType.CZ_VIRTUAL_CLUSTER_KEY, Type.INT64, KuduCommon.DataType.INT64));
            put(Constant.VIRTUAL_NORMAL_KEY,
                new Tuple3<>(ColumnTypeAttributes.ColumnSpecialType.CZ_VIRTUAL_NORMAL_KEY, Type.INT64, KuduCommon.DataType.INT64));
        }};

    public static SchemaConvert getInstance(IGSTableType igsTableType) {
        switch (igsTableType) {
            case NORMAL:
                return new CommonSchemaConvert();
            case CLUSTER:
                return new ClusterSchemaConvert();
            case ACID:
                return new AcidSchemaConvert();
            default:
                throw new UnsupportedOperationException("not support schema convert with table type: " + igsTableType);
        }
    }

    /**
     * format like server side kudu schemaPb. used for in memory tablet schema double check.
     *
     * @param meta
     * @param schema
     * @return
     */
    public KuduCommon.SchemaPB convertToExternalSchemaPB(MetadataEntity.Entity meta, Schema schema) {
        Map fieldNameMap = new HashMap<>();
        for (FieldSchema field : meta.getTable().getTableSchema().getFieldsList()) {
            fieldNameMap.put(field.getName(), field);
        }

        List columns = new ArrayList<>();
        int beginId = 0;
        for (ColumnSchema columnSchema : schema.getColumns()) {
            if (virtualMap.containsKey(columnSchema.getName())) {
                // build bucket column first.
                columns.add(buildVirtualColumnSchemaPB(beginId, columnSchema.getName()));
            } else {
                columns.add(buildColumnSchemaPB(fieldNameMap.get(columnSchema.getName()), columnSchema.isKey(), beginId));
            }
            beginId++;
        }

        return KuduCommon.SchemaPB.newBuilder()
            .addAllColumns(columns)
            .build();
    }

    /**
     * format like server side kudu key schemaPb. used for in memory tablet schema double check.
     *
     * @param meta
     * @param keySchema
     * @return
     */
    public KuduCommon.SchemaPB convertToExternalKeySchemaPB(MetadataEntity.Entity meta, Schema keySchema) {
        return convertToExternalSchemaPB(meta, keySchema);
    }

    protected KuduCommon.ColumnSchemaPB buildVirtualColumnSchemaPB(int beginId, String virtualKeyType) {
        return KuduCommon.ColumnSchemaPB.newBuilder()
            .setId(beginId)
            .setName(virtualKeyType)
            .setIsKey(true)
            .setIsNullable(false)
            .setComment(virtualKeyType)
            .setType(virtualMap.get(virtualKeyType)._3())
            .setTypeAttributes(KuduCommon.ColumnTypeAttributesPB.newBuilder()
                .setSpecialColCode(virtualMap.get(virtualKeyType)._1().value).build())
            .build();
    }

    protected KuduCommon.ColumnSchemaPB buildColumnSchemaPB(FieldSchema field, boolean isKey, int beginId) {
        return buildColumnSchemaPB(field, isKey, beginId, ColumnTypeAttributes.ColumnSpecialType.CZ_ORDINARY_COLUMN);
    }

    protected KuduCommon.ColumnSchemaPB buildColumnSchemaPB(FieldSchema field, boolean isKey, int beginId,
                                                            ColumnTypeAttributes.ColumnSpecialType columnSpecialType) {
        KuduCommon.ColumnSchemaPB columnSchemaPB = null;
        KuduCommon.DataType type = null;
        if (isKey) {
            Preconditions.checkArgument(!field.getType().getNullable(),
                "fieldName [%s] can not be nullable if it is clusterKey sortKey primaryKey.", field.getName());
        }
        switch (field.getType().getCategoryValue()) {
            case DataTypeCategory.INT8_VALUE:
                type = KuduCommon.DataType.INT8;
                break;
            case DataTypeCategory.INT16_VALUE:
                type = KuduCommon.DataType.INT16;
                break;
            case DataTypeCategory.INT32_VALUE:
                type = KuduCommon.DataType.INT32;
                break;
            case DataTypeCategory.INT64_VALUE:
                type = KuduCommon.DataType.INT64;
                break;
            case DataTypeCategory.FLOAT32_VALUE:
                type = KuduCommon.DataType.FLOAT;
                break;
            case DataTypeCategory.FLOAT64_VALUE:
                type = KuduCommon.DataType.DOUBLE;
                break;
            case DataTypeCategory.DECIMAL_VALUE: {
                if (field.getType().hasDecimalTypeInfo()) {
                    if (field.getType().getDecimalTypeInfo().getPrecision() <= 9) {
                        type = KuduCommon.DataType.DECIMAL32;
                    } else if (field.getType().getDecimalTypeInfo().getPrecision() <= 18) {
                        type = KuduCommon.DataType.DECIMAL64;
                    } else {
                        type = KuduCommon.DataType.DECIMAL128;
                    }
                }
                KuduCommon.ColumnTypeAttributesPB attributes = KuduCommon.ColumnTypeAttributesPB.newBuilder()
                    .setPrecision(Integer.valueOf(String.valueOf(field.getType().getDecimalTypeInfo().getPrecision())))
                    .setScale(Integer.valueOf(String.valueOf(field.getType().getDecimalTypeInfo().getScale())))
                    .setSpecialColCode(columnSpecialType.value)
                    .build();

                columnSchemaPB = KuduCommon.ColumnSchemaPB.newBuilder()
                    .setId(beginId)
                    .setName(field.getName())
                    .setIsKey(isKey)
                    .setIsNullable(!isKey && field.getType().getNullable())
                    .setComment(field.getComment())
                    .setType(type)
                    .setTypeAttributes(attributes)
                    .build();
                break;
            }
            case DataTypeCategory.BOOLEAN_VALUE:
                type = KuduCommon.DataType.BOOL;
                break;
            case DataTypeCategory.CHAR_VALUE:
            case DataTypeCategory.VARCHAR_VALUE:
            {
                KuduCommon.ColumnTypeAttributesPB attributesPB = null;
                if (field.getType().getCategory() == DataTypeCategory.CHAR) {
                    attributesPB = KuduCommon.ColumnTypeAttributesPB.newBuilder()
                        .setLength(Integer.valueOf(String.valueOf(field.getType().getCharTypeInfo().getLength())))
                        .setSpecialColCode(columnSpecialType.value)
                        .build();
                } else {
                    attributesPB = KuduCommon.ColumnTypeAttributesPB.newBuilder()
                        .setLength(Integer.valueOf(String.valueOf(field.getType().getVarCharTypeInfo().getLength())))
                        .setSpecialColCode(columnSpecialType.value)
                        .build();
                }

                columnSchemaPB = KuduCommon.ColumnSchemaPB.newBuilder()
                    .setId(beginId)
                    .setName(field.getName())
                    .setIsKey(isKey)
                    .setIsNullable(!isKey && field.getType().getNullable())
                    .setComment(field.getComment())
                    .setType(KuduCommon.DataType.VARCHAR)
                    .setTypeAttributes(attributesPB)
                    .build();
                break;
            }
            case DataTypeCategory.STRING_VALUE:
                type = KuduCommon.DataType.STRING;
                break;
            case DataTypeCategory.BINARY_VALUE:
                type = KuduCommon.DataType.BINARY;
                break;
            case DataTypeCategory.DATE_VALUE:
                type = KuduCommon.DataType.DATE;
                break;
            case DataTypeCategory.TIMESTAMP_LTZ_VALUE:
                type = KuduCommon.DataType.UNIXTIME_MICROS;
                break;
            default:
                LOG.error("DataType:{} is not supported yet.", field.getType().toString());

        }
        if (field.getType().getCategory() == DataTypeCategory.DECIMAL ||
            field.getType().getCategory() == DataTypeCategory.CHAR ||
            field.getType().getCategory() == DataTypeCategory.VARCHAR) {
        } else {
            columnSchemaPB = KuduCommon.ColumnSchemaPB.newBuilder()
                .setId(beginId)
                .setName(field.getName())
                .setIsKey(isKey)
                .setIsNullable(!isKey && field.getType().getNullable())
                .setComment(field.getComment())
                .setType(type)
                .setTypeAttributes(KuduCommon.ColumnTypeAttributesPB
                    .newBuilder().setSpecialColCode(columnSpecialType.value).build())
                .build();
        }
        return columnSchemaPB;
    }

    /**
     * client side schema. only used for row operation encode.
     *
     * @param meta
     * @return
     */
    public abstract Schema convertToExternalSchema(MetadataEntity.Entity meta);

    /**
     * client side key schema. only used for key row operation encode.
     *
     * @param meta
     * @return
     */
    public abstract Schema convertToExternalKeySchema(MetadataEntity.Entity meta);

    protected ColumnSchema buildVirtualColumnSchema(String virtualKeyType) {
        return new ColumnSchema.ColumnSchemaBuilder(virtualKeyType, virtualMap.get(virtualKeyType)._2())
            .key(true).nullable(false).build();
    }

    protected ColumnSchema buildColumnSchema(FieldSchema fieldSchema, boolean isKey) {
        ColumnSchema columnSchema = null;
        boolean isNullable = fieldSchema.getType().getNullable();
        switch (fieldSchema.getType().getCategoryValue()) {
            case DataTypeCategory.CHAR_VALUE:
                columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
                    convertToExternalType(fieldSchema.getType())).typeAttributes(
                    new ColumnTypeAttributes.ColumnTypeAttributesBuilder().length(
                        Integer.valueOf(String.valueOf(fieldSchema.getType().getCharTypeInfo().getLength()))
                    ).build()).nullable(isNullable).key(isKey).build();
                break;
            case DataTypeCategory.VARCHAR_VALUE:
                columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
                    convertToExternalType(fieldSchema.getType())).typeAttributes(
                    new ColumnTypeAttributes.ColumnTypeAttributesBuilder().length(
                        Integer.valueOf(String.valueOf(fieldSchema.getType().getVarCharTypeInfo().getLength()))
                    ).build()).nullable(isNullable).key(isKey).build();
                break;
            case DataTypeCategory.DECIMAL_VALUE:
                columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
                    convertToExternalType(fieldSchema.getType())).typeAttributes(
                    new ColumnTypeAttributes.ColumnTypeAttributesBuilder().precision(
                        Integer.valueOf(String.valueOf(fieldSchema.getType().getDecimalTypeInfo().getPrecision()))
                    ).scale(
                        Integer.valueOf(String.valueOf(fieldSchema.getType().getDecimalTypeInfo().getScale()))
                    ).build()).nullable(isNullable).key(isKey).build();
                break;
            default:
                columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
                    convertToExternalType(fieldSchema.getType())).nullable(isNullable).key(isKey).build();
        }
        return columnSchema;
    }

    private Type convertToExternalType(DataType type) {
        switch (type.getCategoryValue()) {
            case DataTypeCategory.INT8_VALUE:
                return Type.INT8;
            case DataTypeCategory.INT16_VALUE:
                return Type.INT16;
            case DataTypeCategory.INT32_VALUE:
                return Type.INT32;
            case DataTypeCategory.INT64_VALUE:
                return Type.INT64;
            case DataTypeCategory.FLOAT32_VALUE:
                return Type.FLOAT;
            case DataTypeCategory.FLOAT64_VALUE:
                return Type.DOUBLE;
            case DataTypeCategory.DECIMAL_VALUE:
                return Type.DECIMAL;
            case DataTypeCategory.BOOLEAN_VALUE:
                return Type.BOOL;
            case DataTypeCategory.VARCHAR_VALUE:
            case DataTypeCategory.CHAR_VALUE:
                return Type.VARCHAR;
            case DataTypeCategory.STRING_VALUE:
                return Type.STRING;
            case DataTypeCategory.BINARY_VALUE:
                return Type.BINARY;
            case DataTypeCategory.DATE_VALUE:
                return Type.DATE;
            case DataTypeCategory.TIMESTAMP_LTZ_VALUE:
                return Type.UNIXTIME_MICROS;
            default:
                throw new UnsupportedOperationException("not support dataType: " + type);
        }
    }

    /**
     * User-facing schema to create row object for writing data.
     * It returns pure table schema without adding any extra virtual columns.
     *
     * @param meta
     * @return
     */
    public Schema convertToPureExternalSchema(MetadataEntity.Entity meta) {
        List fields = meta.getTable().getTableSchema().getFieldsList();
        List columns = new ArrayList<>(fields.size());
        for (FieldSchema field : fields) {
            if (field.hasHidden() && field.getHidden()) {
                continue;
            }
            if (field.hasVirtual() && field.getVirtual()) {
                continue;
            }
            columns.add(buildColumnSchema(field, false));
        }
        return new Schema(columns, Collections.emptyList(), Collections.emptyList());
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy