All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.bulkload.RecordConverter Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.bulkload;

import com.clickzetta.platform.catalyst.data.DataGetters;
import com.clickzetta.platform.catalyst.data.InternalArray;
import com.clickzetta.platform.catalyst.data.InternalMap;
import com.clickzetta.platform.client.api.ArrowRow;
import com.clickzetta.platform.client.api.Row;
import com.google.common.base.Preconditions;
import cz.proto.DataType;
import cz.proto.DataTypeCategory;
import cz.proto.StructTypeInfo;
import cz.proto.ingestion.v2.IngestionV2;
import org.apache.iceberg.data.GenericRecord;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;

/**
 * Utility class to convert row-wise data from {@link com.clickzetta.platform.client.api.Row}
 * to {@link org.apache.iceberg.data.GenericRecord}.
 */
class RecordConverter {

  private static DataType wrapStructType(IngestionV2.StreamSchema schema) {
    StructTypeInfo.Builder structBuilder = StructTypeInfo.newBuilder();
    for (IngestionV2.DataField dataField : schema.getDataFieldsList()) {
      StructTypeInfo.Field field = StructTypeInfo.Field.newBuilder()
        .setName(dataField.getName()).setType(dataField.getType()).build();
      structBuilder.addFields(field);
    }
    return DataType.newBuilder()
      .setCategory(DataTypeCategory.STRUCT)
      .setStructTypeInfo(structBuilder.build())
      .build();
  }

  private static Object convertObject(DataType inputType, DataGetters dataGetters, int i, Type outputType) {
    switch (inputType.getCategory()) {
      case INT8:
        return (int) dataGetters.getByte(i);
      case INT16:
        return (int) dataGetters.getShort(i);
      case INT32:
        return dataGetters.getInt(i);
      case INT64:
        return dataGetters.getLong(i);
      case FLOAT32:
        return dataGetters.getFloat(i);
      case FLOAT64:
        return dataGetters.getDouble(i);
      case DECIMAL: {
        int precision = Integer.parseInt(String.valueOf(inputType.getDecimalTypeInfo().getPrecision()));
        int scale = Integer.parseInt(String.valueOf(inputType.getDecimalTypeInfo().getScale()));
        BigDecimal newValue = dataGetters.getDecimal(i, precision, scale);
        if (newValue.precision() != precision || newValue.scale() != scale) {
          newValue = newValue.setScale((int) scale, RoundingMode.HALF_UP);
          if (newValue.precision() > precision) {
            throw new RuntimeException("decimal value overflow");
          }
        }
        return newValue;
      }
      case BOOLEAN:
        return dataGetters.getBoolean(i);
      case CHAR:
      case VARCHAR:
      case STRING:
      case JSON:
        return dataGetters.getString(i);
      case BINARY:
        return ByteBuffer.wrap(dataGetters.getBinary(i));
      case DATE:
        return LocalDate.ofEpochDay(dataGetters.getDate(i));
      case TIMESTAMP_LTZ:
        return OffsetDateTime.ofInstant(
          dataGetters.getTimestamp(i).toSQLTimestamp().toInstant(), TimeZone.getDefault().toZoneId());
      case ARRAY: {
        return convertArray(inputType.getArrayTypeInfo().getElementType(),
          dataGetters.getArray(i), outputType.asListType().elementType());
      }
      case MAP: {
        InternalMap internalMap = dataGetters.getMap(i);
        List keyArray = convertArray(inputType.getMapTypeInfo().getKeyType(),
          internalMap.keyArray(), outputType.asMapType().keyType());
        List valueArray = convertArray(inputType.getMapTypeInfo().getValueType(),
          internalMap.valueArray(), outputType.asMapType().valueType());
        Map map = new LinkedHashMap<>();
        for (int j = 0; j < keyArray.size(); j++) {
          map.put(keyArray.get(j), valueArray.get(j));
        }
        return map;
      }
      case STRUCT: {
        int numFields = inputType.getStructTypeInfo().getFieldsCount();
        Record record = GenericRecord.create(outputType.asStructType());
        convertRecord(inputType, dataGetters.getRow(i, numFields), record);
        return record;
      }
      default:
        throw new IllegalArgumentException("Unsupported data type: " + inputType.getCategory());
    }
  }

  private static List convertArray(DataType inputElemType, InternalArray internalArray, Type outputElemType) {
    List array = new ArrayList<>();

    for (int i = 0; i < internalArray.size(); i++) {
      if (inputElemType.getNullable() && internalArray.isNullAt(i)) {
        array.add(null);
      } else {
        array.add(convertObject(inputElemType, internalArray, i, outputElemType));
      }
    }

    return array;
  }

  private static void convertRecord(DataType structType, DataGetters dataGetters, Record record) {
    StructTypeInfo inputStructType = structType.getStructTypeInfo();
    Types.StructType outputStructType = record.struct();

    Preconditions.checkArgument(inputStructType.getFieldsCount() == outputStructType.fields().size(),
      "Column count mismatch between input row and output record");

    for (int i = 0; i < inputStructType.getFieldsCount(); i++) {
      DataType inputType = inputStructType.getFields(i).getType();
      Type outputType = outputStructType.fields().get(i).type();
      if (inputType.getNullable() && dataGetters.isNullAt(i)) {
        record.set(i, null);
      } else {
        record.set(i, convertObject(inputType, dataGetters, i, outputType));
      }
    }
  }

  /**
   * Convert Row to Record.
   * 

* It assumes that Row and Record follows the input schema. The column type mapping * strictly follows the mapping implemented in the SchemaConverter class. Therefore, it * simply gets column by its column index and does not check type mismatch aggressively. */ public static void convertToRecord(IngestionV2.StreamSchema schema, Row row, Record record) { convertRecord(wrapStructType(schema), (ArrowRow) row, record); } }