All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.client.impl.batch.arrow.ArrowUtils Maven / Gradle / Ivy

The newest version!
package com.aliyun.datahub.client.impl.batch.arrow;

import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.impl.batch.BatchConstants;
import com.aliyun.datahub.client.model.Field;
import com.aliyun.datahub.client.model.FieldType;
import com.aliyun.datahub.client.model.RecordSchema;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Schema;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

public class ArrowUtils {
    private final static BufferAllocator BUFFER_ALLOCATOR = new RootAllocator(Integer.MAX_VALUE);

    public static BufferAllocator getBufferAllocator() {
        return BUFFER_ALLOCATOR;
    }

    public static Schema genArrowSchema(RecordSchema recordSchema) {
        return recordSchema != null ? genTupleSchema(recordSchema) : genBlobSchema();
    }

    private static Schema genTupleSchema(RecordSchema recordSchema) {
        List fieldList = new ArrayList<>();

        for (Field dhField : recordSchema.getFields()) {
            fieldList.add(getDataField(dhField.getName(), dhField.getType(), dhField.isAllowNull()));
        }

        fieldList.add(getAttributeField());
        return new Schema(fieldList);
    }

    private static Schema genBlobSchema() {
        List fieldList = new ArrayList<>();
        fieldList.add(getDataField(BatchConstants.BLOB_COLUMN_NAME, new ArrowType.Binary(), false));
        fieldList.add(getAttributeField());
        return new Schema(fieldList);
    }

    private static org.apache.arrow.vector.types.pojo.Field getDataField(String name, FieldType type, boolean allowNull) {
        return getDataField(name, getArrowType(type), allowNull);
    }

    private static org.apache.arrow.vector.types.pojo.Field getDataField(String name, ArrowType type, boolean allowNull) {
        return new org.apache.arrow.vector.types.pojo.Field(
                name,
                new org.apache.arrow.vector.types.pojo.FieldType(allowNull, type, null),
                null);
    }

    private static org.apache.arrow.vector.types.pojo.Field getAttributeField() {
        List children = Collections.singletonList(
                new org.apache.arrow.vector.types.pojo.Field("element",
                        new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Struct(), null, null),
                        Arrays.asList(
                                getDataField("key", FieldType.STRING, false),
                                getDataField("value", FieldType.STRING, false)
                        )
                )
        );

        return new org.apache.arrow.vector.types.pojo.Field(
                BatchConstants.ATTRIBUTE_COLUMN_NAME,
                new org.apache.arrow.vector.types.pojo.FieldType(true, new ArrowType.Map(false), null),
                children);
    }

    private static ArrowType getArrowType(FieldType type) {
        switch (type) {
            case BOOLEAN:
                return new ArrowType.Bool();
            case TINYINT:
                return new ArrowType.Int(8, true);
            case SMALLINT:
                return new ArrowType.Int(16, true);
            case INTEGER:
                return new ArrowType.Int(32, true);
            case BIGINT:
            case TIMESTAMP:
                return new ArrowType.Int(64, true);
            case FLOAT:
                return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
            case DOUBLE:
                return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
            case STRING:
            case DECIMAL:
            case JSON:
                return new ArrowType.Utf8();
        }
        throw new DatahubClientException("Unknown DataHub type " + type);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy