All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.table.utils.SchemaUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.table.utils;

import com.aliyun.odps.Column;
import com.aliyun.odps.table.configuration.ArrowOptions;
import com.aliyun.odps.type.*;
import com.google.gson.JsonObject;
import org.apache.arrow.vector.types.DateUnit;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.TimeUnit;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;

public class SchemaUtils {

    public static final int DECIMAL_DEFAULT_PRECISION = 54;
    public static final int DECIMAL_DEFAULT_SCALE = 18;

    public static final int ARROW_DECIMAL_DEFAULT_PRECISION = 38;
    public static final int ARROW_DECIMAL_DEFAULT_SCALE = 18;

    public static Column parseColumn(JsonObject column) {
        String name = column.has("Name") ? column.get("Name").getAsString() : null;
        String type = column.has("Type") ? column.get("Type").getAsString() : null;
        String comment = column.has("Comment") ? column.get("Comment").getAsString() : null;
        TypeInfo typeInfo = TypeInfoParser.getTypeInfoFromTypeString(type);
        Column col = new Column(name, typeInfo, comment);
        if (column.has("Nullable")) {
            col.setNullable(column.get("Nullable").getAsBoolean());
        }
        return col;
    }

    public static Schema toArrowSchema(List columns) {
        return toArrowSchema(columns, ArrowOptions.createDefault());
    }

    public static Field columnToArrowField(Column fieldColumn) {
        return columnToArrowField(fieldColumn, ArrowOptions.createDefault());
    }

    public static Schema toArrowSchema(List columns,
                                       ArrowOptions arrowOptions) {
        Collection fields =
                columns.stream().map(col -> columnToArrowField(col, arrowOptions))
                        .collect(Collectors.toCollection(ArrayList::new));
        return new Schema(fields);
    }

    public static Field columnToArrowField(Column fieldColumn,
                                           ArrowOptions options) {
        String fieldName = fieldColumn.getName();
        TypeInfo typeInfo = fieldColumn.getTypeInfo();
        return convertTypeInfoToArrowField(fieldName, typeInfo, fieldColumn.isNullable(), options);
    }

    private static Field convertTypeInfoToArrowField(String fieldName,
                                                     TypeInfo typeInfo,
                                                     boolean nullable,
                                                     ArrowOptions options) {
        ArrowType arrowType = getArrowType(typeInfo, options);
        return new Field(fieldName, new FieldType(nullable, arrowType, null, null),
                generateSubFields(typeInfo, options));
    }

    private static List generateSubFields(TypeInfo typeInfo,
                                                 ArrowOptions options) {
        if (typeInfo instanceof ArrayTypeInfo) {
            ArrayTypeInfo arrayTypeInfo = (ArrayTypeInfo) typeInfo;
            TypeInfo subti = arrayTypeInfo.getElementTypeInfo();
            return Arrays.asList(convertTypeInfoToArrowField("element", subti, true, options));
        } else if (typeInfo instanceof MapTypeInfo) {
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            TypeInfo keyti = mapTypeInfo.getKeyTypeInfo(), valti = mapTypeInfo.getValueTypeInfo();
            return Arrays.asList(
                    new Field("element", new FieldType(false, new ArrowType.Struct(), null, null),
                            Arrays.asList(
                                    convertTypeInfoToArrowField("key", keyti, false, options),
                                    convertTypeInfoToArrowField("value", valti, true, options)
                            )
                    )
            );
        } else if (typeInfo instanceof StructTypeInfo) {
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            ArrayList sfields = new ArrayList<>();
            List subTypeInfos = structTypeInfo.getFieldTypeInfos();
            List subNames = structTypeInfo.getFieldNames();
            for (int i = 0; i < structTypeInfo.getFieldCount(); i++) {
                sfields.add(convertTypeInfoToArrowField(subNames.get(i), subTypeInfos.get(i), true, options));
            }
            return sfields;
        } else {
            return null;
        }
    }

    private static ArrowType getArrowType(TypeInfo typeInfo,
                                          ArrowOptions options) {
        ArrowType arrowType = null;
        switch (typeInfo.getOdpsType()) {
            case CHAR:
            case VARCHAR:
            case JSON:
            case STRING:
                arrowType = new ArrowType.Utf8();
                break;
            case BINARY:
                arrowType = new ArrowType.Binary();
                break;
            case TINYINT:
                arrowType = new ArrowType.Int(8, true);
                break;
            case SMALLINT:
                arrowType = new ArrowType.Int(16, true);
                break;
            case INT:
                arrowType = new ArrowType.Int(32, true);
                break;
            case BIGINT:
                arrowType = new ArrowType.Int(64, true);
                break;
            case BOOLEAN:
                arrowType = new ArrowType.Bool();
                break;
            case FLOAT:
                arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
                break;
            case DOUBLE:
                arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
                break;
            case DECIMAL:
                // set decimal(54,18) to decimal(38,18)
                if (((DecimalTypeInfo) typeInfo).getPrecision() == DECIMAL_DEFAULT_PRECISION &&
                        ((DecimalTypeInfo) typeInfo).getScale() == DECIMAL_DEFAULT_SCALE) {
                    arrowType = new ArrowType.Decimal(ARROW_DECIMAL_DEFAULT_PRECISION, ARROW_DECIMAL_DEFAULT_SCALE);
                } else {
                    arrowType = new ArrowType.Decimal(((DecimalTypeInfo) typeInfo).getPrecision(), ((DecimalTypeInfo) typeInfo).getScale());
                }
                break;
            case DATE:
                arrowType = new ArrowType.Date(DateUnit.DAY);
                break;
            case DATETIME:
                arrowType = parseTimeStamp(options.getDateTimeUnit());
                break;
            case TIMESTAMP:
            case TIMESTAMP_NTZ:
                arrowType = parseTimeStamp(options.getTimestampUnit());
                break;
            case ARRAY:
                arrowType = new ArrowType.List();
                break;
            case STRUCT:
                arrowType = new ArrowType.Struct();
                break;
            case MAP:
                arrowType = new ArrowType.Map(false);
                break;
            default:
                throw new UnsupportedOperationException("Unsupported type: " + typeInfo.getOdpsType());
        }
        return arrowType;
    }

    private static ArrowType.Timestamp parseTimeStamp(ArrowOptions.TimestampUnit unit) {
        switch (unit) {
            case SECOND:
                return new ArrowType.Timestamp(TimeUnit.SECOND, null);
            case MILLI:
                return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
            case NANO:
                return new ArrowType.Timestamp(TimeUnit.NANOSECOND, null);
            case MICRO:
                return new ArrowType.Timestamp(TimeUnit.MICROSECOND, null);
            default:
                throw new UnsupportedOperationException("Unsupported type: " + unit);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy