com.clickzetta.platform.bulkload.SchemaConverter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.bulkload;
import cz.proto.ArrayTypeInfo;
import cz.proto.DataType;
import cz.proto.DecimalTypeInfo;
import cz.proto.MapTypeInfo;
import cz.proto.StructTypeInfo;
import cz.proto.ingestion.v2.IngestionV2;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.BooleanType;
import org.apache.iceberg.types.Types.DateType;
import org.apache.iceberg.types.Types.DecimalType;
import org.apache.iceberg.types.Types.DoubleType;
import org.apache.iceberg.types.Types.FloatType;
import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.LongType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.types.Types.TimestampType;
import java.util.ArrayList;
import java.util.List;
/**
* Utility class to convert schema from {@link IngestionV2.StreamSchema} to {@link org.apache.iceberg.Schema}.
*/
class SchemaConverter {
public static Schema convertToIcebergSchema(IngestionV2.StreamSchema tableSchema) {
List fields = new ArrayList<>();
for (int i = 0; i < tableSchema.getDataFieldsCount(); ++i) {
DataType columnDataType = tableSchema.getDataFields(i).getType();
String columnName = tableSchema.getDataFields(i).getName();
fields.add(convertToIcebergField(columnDataType, columnName));
}
return new Schema(fields);
}
private static NestedField convertToIcebergField(DataType columnDataType, String columnName) {
boolean nullable = columnDataType.getNullable();
int fieldId = columnDataType.getFieldId();
switch (columnDataType.getCategory()) {
case INT8:
case INT16:
case INT32:
return NestedField.of(fieldId, nullable, columnName, IntegerType.get());
case INT64:
return NestedField.of(fieldId, nullable, columnName, LongType.get());
case FLOAT32:
return NestedField.of(fieldId, nullable, columnName, FloatType.get());
case FLOAT64:
return NestedField.of(fieldId, nullable, columnName, DoubleType.get());
case BOOLEAN:
return NestedField.of(fieldId, nullable, columnName, BooleanType.get());
case DECIMAL: {
if (!columnDataType.hasDecimalTypeInfo()) {
throw new IllegalArgumentException("Invalid decimal type: " + columnDataType);
}
DecimalTypeInfo decimalTypeInfo = columnDataType.getDecimalTypeInfo();
return NestedField.of(fieldId, nullable, columnName,
DecimalType.of((int) decimalTypeInfo.getPrecision(), (int) decimalTypeInfo.getScale()));
}
case CHAR:
case VARCHAR:
case STRING:
case JSON:
return NestedField.of(fieldId, nullable, columnName, StringType.get());
case BINARY:
return NestedField.of(fieldId, nullable, columnName, BinaryType.get());
case DATE:
return NestedField.of(fieldId, nullable, columnName, DateType.get());
case TIMESTAMP_LTZ: {
if (!columnDataType.hasTimestampInfo()) {
throw new IllegalArgumentException("Invalid timestamp type: " + columnDataType);
}
switch (columnDataType.getTimestampInfo().getTsUnit()) {
case MICROSECONDS:
return NestedField.of(fieldId, nullable, columnName, TimestampType.withZone());
case SECONDS:
case MILLISECONDS:
case NANOSECONDS:
default:
throw new IllegalArgumentException("Unsupported timestamp type: " + columnDataType);
}
}
case ARRAY: {
if (!columnDataType.hasArrayTypeInfo()) {
throw new IllegalArgumentException("Invalid array type: " + columnDataType);
}
ArrayTypeInfo arrayTypeInfo = columnDataType.getArrayTypeInfo();
NestedField childField = convertToIcebergField(arrayTypeInfo.getElementType(), "element");
ListType listType;
if (childField.isOptional()) {
listType = ListType.ofOptional(childField.fieldId(), childField.type());
} else {
listType = ListType.ofRequired(childField.fieldId(), childField.type());
}
return NestedField.of(fieldId, nullable, columnName, listType);
}
case MAP: {
if (!columnDataType.hasMapTypeInfo()) {
throw new IllegalArgumentException("Invalid map type: " + columnDataType);
}
MapTypeInfo mapTypeInfo = columnDataType.getMapTypeInfo();
NestedField keyField = convertToIcebergField(mapTypeInfo.getKeyType(), "key");
NestedField valueField = convertToIcebergField(mapTypeInfo.getValueType(), "value");
MapType mapType;
if (valueField.isOptional()) {
mapType = MapType.ofOptional(keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type());
} else {
mapType = MapType.ofRequired(keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type());
}
return NestedField.of(fieldId, nullable, columnName, mapType);
}
case STRUCT: {
if (!columnDataType.hasStructTypeInfo()) {
throw new IllegalArgumentException("Invalid struct type: " + columnDataType);
}
StructTypeInfo structTypeInfo = columnDataType.getStructTypeInfo();
List childFields = new ArrayList<>();
for (int i = 0; i < structTypeInfo.getFieldsCount(); ++i) {
StructTypeInfo.Field subField = structTypeInfo.getFields(i);
childFields.add(convertToIcebergField(subField.getType(), subField.getName()));
}
return NestedField.of(fieldId, nullable, columnName, StructType.of(childFields));
}
case INTERVAL_YEAR_MONTH:
case INTERVAL_DAY_TIME:
case BITMAP:
case NONE:
case FUNCTION_TYPE:
case VOID:
default:
throw new IllegalArgumentException("Unsupported type: " + columnDataType);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy