com.clickzetta.platform.common.SchemaConvert Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.common;
import com.google.common.base.Preconditions;
import cz.proto.*;
import cz.proto.ingestion.Ingestion.IGSTableType;
import org.apache.kudu.KuduCommon;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple3;
import java.util.*;
public abstract class SchemaConvert {
private static final Logger LOG = LoggerFactory.getLogger(SchemaConvert.class);
public static Map> virtualMap =
new HashMap>() {{
put(Constant.VIRTUAL_BUCKET_KEY,
new Tuple3<>(ColumnTypeAttributes.ColumnSpecialType.CZ_HIDDEN_BUCKET_ID, Type.INT32, KuduCommon.DataType.UINT32));
put(Constant.VIRTUAL_CLUSTER_KEY,
new Tuple3<>(ColumnTypeAttributes.ColumnSpecialType.CZ_VIRTUAL_CLUSTER_KEY, Type.INT64, KuduCommon.DataType.INT64));
put(Constant.VIRTUAL_NORMAL_KEY,
new Tuple3<>(ColumnTypeAttributes.ColumnSpecialType.CZ_VIRTUAL_NORMAL_KEY, Type.INT64, KuduCommon.DataType.INT64));
}};
public static SchemaConvert getInstance(IGSTableType igsTableType) {
switch (igsTableType) {
case NORMAL:
return new CommonSchemaConvert();
case CLUSTER:
return new ClusterSchemaConvert();
case ACID:
return new AcidSchemaConvert();
default:
throw new UnsupportedOperationException("not support schema convert with table type: " + igsTableType);
}
}
/**
* format like server side kudu schemaPb. used for in memory tablet schema double check.
*
* @param meta
* @param schema
* @return
*/
public KuduCommon.SchemaPB convertToExternalSchemaPB(MetadataEntity.Entity meta, Schema schema) {
Map fieldNameMap = new HashMap<>();
for (FieldSchema field : meta.getTable().getTableSchema().getFieldsList()) {
fieldNameMap.put(field.getName(), field);
}
List columns = new ArrayList<>();
int beginId = 0;
for (ColumnSchema columnSchema : schema.getColumns()) {
if (virtualMap.containsKey(columnSchema.getName())) {
// build bucket column first.
columns.add(buildVirtualColumnSchemaPB(beginId, columnSchema.getName()));
} else {
columns.add(buildColumnSchemaPB(fieldNameMap.get(columnSchema.getName()), columnSchema.isKey(), beginId));
}
beginId++;
}
return KuduCommon.SchemaPB.newBuilder()
.addAllColumns(columns)
.build();
}
/**
* format like server side kudu key schemaPb. used for in memory tablet schema double check.
*
* @param meta
* @param keySchema
* @return
*/
public KuduCommon.SchemaPB convertToExternalKeySchemaPB(MetadataEntity.Entity meta, Schema keySchema) {
return convertToExternalSchemaPB(meta, keySchema);
}
protected KuduCommon.ColumnSchemaPB buildVirtualColumnSchemaPB(int beginId, String virtualKeyType) {
return KuduCommon.ColumnSchemaPB.newBuilder()
.setId(beginId)
.setName(virtualKeyType)
.setIsKey(true)
.setIsNullable(false)
.setComment(virtualKeyType)
.setType(virtualMap.get(virtualKeyType)._3())
.setTypeAttributes(KuduCommon.ColumnTypeAttributesPB.newBuilder()
.setSpecialColCode(virtualMap.get(virtualKeyType)._1().value).build())
.build();
}
protected KuduCommon.ColumnSchemaPB buildColumnSchemaPB(FieldSchema field, boolean isKey, int beginId) {
return buildColumnSchemaPB(field, isKey, beginId, ColumnTypeAttributes.ColumnSpecialType.CZ_ORDINARY_COLUMN);
}
protected KuduCommon.ColumnSchemaPB buildColumnSchemaPB(FieldSchema field, boolean isKey, int beginId,
ColumnTypeAttributes.ColumnSpecialType columnSpecialType) {
KuduCommon.ColumnSchemaPB columnSchemaPB = null;
KuduCommon.DataType type = null;
if (isKey) {
Preconditions.checkArgument(!field.getType().getNullable(),
"fieldName [%s] can not be nullable if it is clusterKey sortKey primaryKey.", field.getName());
}
switch (field.getType().getCategoryValue()) {
case DataTypeCategory.INT8_VALUE:
type = KuduCommon.DataType.INT8;
break;
case DataTypeCategory.INT16_VALUE:
type = KuduCommon.DataType.INT16;
break;
case DataTypeCategory.INT32_VALUE:
type = KuduCommon.DataType.INT32;
break;
case DataTypeCategory.INT64_VALUE:
type = KuduCommon.DataType.INT64;
break;
case DataTypeCategory.FLOAT32_VALUE:
type = KuduCommon.DataType.FLOAT;
break;
case DataTypeCategory.FLOAT64_VALUE:
type = KuduCommon.DataType.DOUBLE;
break;
case DataTypeCategory.DECIMAL_VALUE: {
if (field.getType().hasDecimalTypeInfo()) {
if (field.getType().getDecimalTypeInfo().getPrecision() <= 9) {
type = KuduCommon.DataType.DECIMAL32;
} else if (field.getType().getDecimalTypeInfo().getPrecision() <= 18) {
type = KuduCommon.DataType.DECIMAL64;
} else {
type = KuduCommon.DataType.DECIMAL128;
}
}
KuduCommon.ColumnTypeAttributesPB attributes = KuduCommon.ColumnTypeAttributesPB.newBuilder()
.setPrecision(Integer.valueOf(String.valueOf(field.getType().getDecimalTypeInfo().getPrecision())))
.setScale(Integer.valueOf(String.valueOf(field.getType().getDecimalTypeInfo().getScale())))
.setSpecialColCode(columnSpecialType.value)
.build();
columnSchemaPB = KuduCommon.ColumnSchemaPB.newBuilder()
.setId(beginId)
.setName(field.getName())
.setIsKey(isKey)
.setIsNullable(!isKey && field.getType().getNullable())
.setComment(field.getComment())
.setType(type)
.setTypeAttributes(attributes)
.build();
break;
}
case DataTypeCategory.BOOLEAN_VALUE:
type = KuduCommon.DataType.BOOL;
break;
case DataTypeCategory.CHAR_VALUE:
case DataTypeCategory.VARCHAR_VALUE:
{
KuduCommon.ColumnTypeAttributesPB attributesPB = null;
if (field.getType().getCategory() == DataTypeCategory.CHAR) {
attributesPB = KuduCommon.ColumnTypeAttributesPB.newBuilder()
.setLength(Integer.valueOf(String.valueOf(field.getType().getCharTypeInfo().getLength())))
.setSpecialColCode(columnSpecialType.value)
.build();
} else {
attributesPB = KuduCommon.ColumnTypeAttributesPB.newBuilder()
.setLength(Integer.valueOf(String.valueOf(field.getType().getVarCharTypeInfo().getLength())))
.setSpecialColCode(columnSpecialType.value)
.build();
}
columnSchemaPB = KuduCommon.ColumnSchemaPB.newBuilder()
.setId(beginId)
.setName(field.getName())
.setIsKey(isKey)
.setIsNullable(!isKey && field.getType().getNullable())
.setComment(field.getComment())
.setType(KuduCommon.DataType.VARCHAR)
.setTypeAttributes(attributesPB)
.build();
break;
}
case DataTypeCategory.STRING_VALUE:
type = KuduCommon.DataType.STRING;
break;
case DataTypeCategory.BINARY_VALUE:
type = KuduCommon.DataType.BINARY;
break;
case DataTypeCategory.DATE_VALUE:
type = KuduCommon.DataType.DATE;
break;
case DataTypeCategory.TIMESTAMP_LTZ_VALUE:
type = KuduCommon.DataType.UNIXTIME_MICROS;
break;
default:
LOG.error("DataType:{} is not supported yet.", field.getType().toString());
}
if (field.getType().getCategory() == DataTypeCategory.DECIMAL ||
field.getType().getCategory() == DataTypeCategory.CHAR ||
field.getType().getCategory() == DataTypeCategory.VARCHAR) {
} else {
columnSchemaPB = KuduCommon.ColumnSchemaPB.newBuilder()
.setId(beginId)
.setName(field.getName())
.setIsKey(isKey)
.setIsNullable(!isKey && field.getType().getNullable())
.setComment(field.getComment())
.setType(type)
.setTypeAttributes(KuduCommon.ColumnTypeAttributesPB
.newBuilder().setSpecialColCode(columnSpecialType.value).build())
.build();
}
return columnSchemaPB;
}
/**
* client side schema. only used for row operation encode.
*
* @param meta
* @return
*/
public abstract Schema convertToExternalSchema(MetadataEntity.Entity meta);
/**
* client side key schema. only used for key row operation encode.
*
* @param meta
* @return
*/
public abstract Schema convertToExternalKeySchema(MetadataEntity.Entity meta);
protected ColumnSchema buildVirtualColumnSchema(String virtualKeyType) {
return new ColumnSchema.ColumnSchemaBuilder(virtualKeyType, virtualMap.get(virtualKeyType)._2())
.key(true).nullable(false).build();
}
protected ColumnSchema buildColumnSchema(FieldSchema fieldSchema, boolean isKey) {
ColumnSchema columnSchema = null;
boolean isNullable = fieldSchema.getType().getNullable();
switch (fieldSchema.getType().getCategoryValue()) {
case DataTypeCategory.CHAR_VALUE:
columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
convertToExternalType(fieldSchema.getType())).typeAttributes(
new ColumnTypeAttributes.ColumnTypeAttributesBuilder().length(
Integer.valueOf(String.valueOf(fieldSchema.getType().getCharTypeInfo().getLength()))
).build()).nullable(isNullable).key(isKey).build();
break;
case DataTypeCategory.VARCHAR_VALUE:
columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
convertToExternalType(fieldSchema.getType())).typeAttributes(
new ColumnTypeAttributes.ColumnTypeAttributesBuilder().length(
Integer.valueOf(String.valueOf(fieldSchema.getType().getVarCharTypeInfo().getLength()))
).build()).nullable(isNullable).key(isKey).build();
break;
case DataTypeCategory.DECIMAL_VALUE:
columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
convertToExternalType(fieldSchema.getType())).typeAttributes(
new ColumnTypeAttributes.ColumnTypeAttributesBuilder().precision(
Integer.valueOf(String.valueOf(fieldSchema.getType().getDecimalTypeInfo().getPrecision()))
).scale(
Integer.valueOf(String.valueOf(fieldSchema.getType().getDecimalTypeInfo().getScale()))
).build()).nullable(isNullable).key(isKey).build();
break;
default:
columnSchema = new ColumnSchema.ColumnSchemaBuilder(fieldSchema.getName(),
convertToExternalType(fieldSchema.getType())).nullable(isNullable).key(isKey).build();
}
return columnSchema;
}
private Type convertToExternalType(DataType type) {
switch (type.getCategoryValue()) {
case DataTypeCategory.INT8_VALUE:
return Type.INT8;
case DataTypeCategory.INT16_VALUE:
return Type.INT16;
case DataTypeCategory.INT32_VALUE:
return Type.INT32;
case DataTypeCategory.INT64_VALUE:
return Type.INT64;
case DataTypeCategory.FLOAT32_VALUE:
return Type.FLOAT;
case DataTypeCategory.FLOAT64_VALUE:
return Type.DOUBLE;
case DataTypeCategory.DECIMAL_VALUE:
return Type.DECIMAL;
case DataTypeCategory.BOOLEAN_VALUE:
return Type.BOOL;
case DataTypeCategory.VARCHAR_VALUE:
case DataTypeCategory.CHAR_VALUE:
return Type.VARCHAR;
case DataTypeCategory.STRING_VALUE:
return Type.STRING;
case DataTypeCategory.BINARY_VALUE:
return Type.BINARY;
case DataTypeCategory.DATE_VALUE:
return Type.DATE;
case DataTypeCategory.TIMESTAMP_LTZ_VALUE:
return Type.UNIXTIME_MICROS;
default:
throw new UnsupportedOperationException("not support dataType: " + type);
}
}
/**
* User-facing schema to create row object for writing data.
* It returns pure table schema without adding any extra virtual columns.
*
* @param meta
* @return
*/
public Schema convertToPureExternalSchema(MetadataEntity.Entity meta) {
List fields = meta.getTable().getTableSchema().getFieldsList();
List columns = new ArrayList<>(fields.size());
for (FieldSchema field : fields) {
if (field.hasHidden() && field.getHidden()) {
continue;
}
if (field.hasVirtual() && field.getVirtual()) {
continue;
}
columns.add(buildColumnSchema(field, false));
}
return new Schema(columns, Collections.emptyList(), Collections.emptyList());
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy