All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.bulkload.SchemaConverter Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.bulkload;

import cz.proto.ArrayTypeInfo;
import cz.proto.DataType;
import cz.proto.DecimalTypeInfo;
import cz.proto.MapTypeInfo;
import cz.proto.StructTypeInfo;
import cz.proto.ingestion.v2.IngestionV2;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.BooleanType;
import org.apache.iceberg.types.Types.DateType;
import org.apache.iceberg.types.Types.DecimalType;
import org.apache.iceberg.types.Types.DoubleType;
import org.apache.iceberg.types.Types.FloatType;
import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.LongType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.types.Types.TimestampType;

import java.util.ArrayList;
import java.util.List;

/**
 * Utility class to convert schema from {@link IngestionV2.StreamSchema} to {@link org.apache.iceberg.Schema}.
 */
class SchemaConverter {

  public static Schema convertToIcebergSchema(IngestionV2.StreamSchema tableSchema) {
    List fields = new ArrayList<>();
    for (int i = 0; i < tableSchema.getDataFieldsCount(); ++i) {
      DataType columnDataType = tableSchema.getDataFields(i).getType();
      String columnName = tableSchema.getDataFields(i).getName();
      fields.add(convertToIcebergField(columnDataType, columnName));
    }
    return new Schema(fields);
  }

  private static NestedField convertToIcebergField(DataType columnDataType, String columnName) {
    boolean nullable = columnDataType.getNullable();
    int fieldId = columnDataType.getFieldId();
    switch (columnDataType.getCategory()) {
      case INT8:
      case INT16:
      case INT32:
        return NestedField.of(fieldId, nullable, columnName, IntegerType.get());
      case INT64:
        return NestedField.of(fieldId, nullable, columnName, LongType.get());
      case FLOAT32:
        return NestedField.of(fieldId, nullable, columnName, FloatType.get());
      case FLOAT64:
        return NestedField.of(fieldId, nullable, columnName, DoubleType.get());
      case BOOLEAN:
        return NestedField.of(fieldId, nullable, columnName, BooleanType.get());
      case DECIMAL: {
        if (!columnDataType.hasDecimalTypeInfo()) {
          throw new IllegalArgumentException("Invalid decimal type: " + columnDataType);
        }
        DecimalTypeInfo decimalTypeInfo = columnDataType.getDecimalTypeInfo();
        return NestedField.of(fieldId, nullable, columnName,
          DecimalType.of((int) decimalTypeInfo.getPrecision(), (int) decimalTypeInfo.getScale()));
      }
      case CHAR:
      case VARCHAR:
      case STRING:
      case JSON:
        return NestedField.of(fieldId, nullable, columnName, StringType.get());
      case BINARY:
        return NestedField.of(fieldId, nullable, columnName, BinaryType.get());
      case DATE:
        return NestedField.of(fieldId, nullable, columnName, DateType.get());
      case TIMESTAMP_LTZ: {
        if (!columnDataType.hasTimestampInfo()) {
          throw new IllegalArgumentException("Invalid timestamp type: " + columnDataType);
        }
        switch (columnDataType.getTimestampInfo().getTsUnit()) {
          case MICROSECONDS:
            return NestedField.of(fieldId, nullable, columnName, TimestampType.withZone());
          case SECONDS:
          case MILLISECONDS:
          case NANOSECONDS:
          default:
            throw new IllegalArgumentException("Unsupported timestamp type: " + columnDataType);
        }
      }
      case ARRAY: {
        if (!columnDataType.hasArrayTypeInfo()) {
          throw new IllegalArgumentException("Invalid array type: " + columnDataType);
        }
        ArrayTypeInfo arrayTypeInfo = columnDataType.getArrayTypeInfo();
        NestedField childField = convertToIcebergField(arrayTypeInfo.getElementType(), "element");
        ListType listType;
        if (childField.isOptional()) {
          listType = ListType.ofOptional(childField.fieldId(), childField.type());
        } else {
          listType = ListType.ofRequired(childField.fieldId(), childField.type());
        }
        return NestedField.of(fieldId, nullable, columnName, listType);
      }
      case MAP: {
        if (!columnDataType.hasMapTypeInfo()) {
          throw new IllegalArgumentException("Invalid map type: " + columnDataType);
        }
        MapTypeInfo mapTypeInfo = columnDataType.getMapTypeInfo();
        NestedField keyField = convertToIcebergField(mapTypeInfo.getKeyType(), "key");
        NestedField valueField = convertToIcebergField(mapTypeInfo.getValueType(), "value");
        MapType mapType;
        if (valueField.isOptional()) {
          mapType = MapType.ofOptional(keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type());
        } else {
          mapType = MapType.ofRequired(keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type());
        }
        return NestedField.of(fieldId, nullable, columnName, mapType);
      }
      case STRUCT: {
        if (!columnDataType.hasStructTypeInfo()) {
          throw new IllegalArgumentException("Invalid struct type: " + columnDataType);
        }
        StructTypeInfo structTypeInfo = columnDataType.getStructTypeInfo();
        List childFields = new ArrayList<>();
        for (int i = 0; i < structTypeInfo.getFieldsCount(); ++i) {
          StructTypeInfo.Field subField = structTypeInfo.getFields(i);
          childFields.add(convertToIcebergField(subField.getType(), subField.getName()));
        }
        return NestedField.of(fieldId, nullable, columnName, StructType.of(childFields));
      }
      case INTERVAL_YEAR_MONTH:
      case INTERVAL_DAY_TIME:
      case BITMAP:
      case NONE:
      case FUNCTION_TYPE:
      case VOID:
      default:
        throw new IllegalArgumentException("Unsupported type: " + columnDataType);
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy