All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.hive.formats.line.openxjson.OpenXJsonSerializer Maven / Gradle / Ivy

There is a newer version: 458
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.hive.formats.line.openxjson;

import com.google.common.collect.ImmutableList;
import io.airlift.slice.SliceOutput;
import io.trino.hive.formats.HiveFormatUtils;
import io.trino.hive.formats.line.Column;
import io.trino.hive.formats.line.LineSerializer;
import io.trino.spi.Page;
import io.trino.spi.block.Block;
import io.trino.spi.block.SqlMap;
import io.trino.spi.block.SqlRow;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.CharType;
import io.trino.spi.type.Chars;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.MapType;
import io.trino.spi.type.RowType;
import io.trino.spi.type.RowType.Field;
import io.trino.spi.type.SqlDecimal;
import io.trino.spi.type.SqlTimestamp;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.SignStyle;
import java.util.ArrayList;
import java.util.Base64;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.hive.formats.line.openxjson.JsonWriter.writeJsonArray;
import static io.trino.hive.formats.line.openxjson.JsonWriter.writeJsonObject;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.DateType.DATE;
import static io.trino.spi.type.DoubleType.DOUBLE;
import static io.trino.spi.type.IntegerType.INTEGER;
import static io.trino.spi.type.RealType.REAL;
import static io.trino.spi.type.SmallintType.SMALLINT;
import static io.trino.spi.type.TinyintType.TINYINT;
import static io.trino.spi.type.VarbinaryType.VARBINARY;
import static java.time.temporal.ChronoField.DAY_OF_MONTH;
import static java.time.temporal.ChronoField.HOUR_OF_DAY;
import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
import static java.time.temporal.ChronoField.NANO_OF_SECOND;
import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
import static java.time.temporal.ChronoField.YEAR;
import static java.util.Objects.requireNonNull;

public class OpenXJsonSerializer
        implements LineSerializer
{
    private static final DateTimeFormatter UTC_PRINT_FORMATTER = new DateTimeFormatterBuilder()
            .appendValue(YEAR, 1, 10, SignStyle.NORMAL)
            .appendLiteral('-')
            .appendValue(MONTH_OF_YEAR, 2, 2, SignStyle.NORMAL)
            .appendLiteral('-')
            .appendValue(DAY_OF_MONTH, 2, 2, SignStyle.NORMAL)
            .appendLiteral('T')
            .appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NORMAL)
            .appendLiteral(':')
            .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NORMAL)
            .appendLiteral(':')
            .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NORMAL)
            .optionalStart()
            .appendFraction(NANO_OF_SECOND, 0, 9, true)
            .optionalEnd()
            .appendLiteral('Z')
            .toFormatter();

    private final List columns;
    private final OpenXJsonOptions options;

    public OpenXJsonSerializer(List columns, OpenXJsonOptions options)
    {
        this.columns = ImmutableList.copyOf(columns);
        this.options = requireNonNull(options, "options is null");
        for (Column column : columns) {
            if (!isSupportedType(column.type())) {
                throw new IllegalArgumentException("Unsupported column type: " + column);
            }
        }
    }

    @Override
    public List getTypes()
    {
        return columns.stream().map(Column::type).collect(toImmutableList());
    }

    @Override
    public void write(Page page, int position, SliceOutput sliceOutput)
            throws IOException
    {
        Map jsonObject = new LinkedHashMap<>();

        for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
            Column column = columns.get(columnIndex);
            String fieldName = column.name();

            Block block = page.getBlock(columnIndex);
            Object fieldValue = writeValue(column.type(), block, position);
            if (options.isExplicitNull() || fieldValue != null) {
                jsonObject.put(fieldName, fieldValue);
            }
        }
        sliceOutput.write(writeJsonObject(jsonObject).getBytes(StandardCharsets.UTF_8));
    }

    private Object writeValue(Type type, Block block, int position)
            throws InvalidJsonException
    {
        if (block.isNull(position)) {
            return null;
        }

        if (BOOLEAN.equals(type)) {
            return BOOLEAN.getBoolean(block, position);
        }
        else if (BIGINT.equals(type)) {
            return BIGINT.getLong(block, position);
        }
        else if (INTEGER.equals(type)) {
            return INTEGER.getInt(block, position);
        }
        else if (SMALLINT.equals(type)) {
            return SMALLINT.getShort(block, position);
        }
        else if (TINYINT.equals(type)) {
            return TINYINT.getByte(block, position);
        }
        else if (type instanceof DecimalType) {
            // decimal type is read-only in Hive, but we support it
            SqlDecimal value = (SqlDecimal) type.getObjectValue(null, block, position);
            return value.toBigDecimal().toString();
        }
        else if (REAL.equals(type)) {
            return REAL.getFloat(block, position);
        }
        else if (DOUBLE.equals(type)) {
            return DOUBLE.getDouble(block, position);
        }
        else if (DATE.equals(type)) {
            // date type is read-only in Hive, but we support it
            return HiveFormatUtils.formatHiveDate(block, position);
        }
        else if (type instanceof TimestampType) {
            SqlTimestamp objectValue = (SqlTimestamp) type.getObjectValue(null, block, position);
            LocalDateTime localDateTime = objectValue.toLocalDateTime();
            return UTC_PRINT_FORMATTER.format(localDateTime);
        }
        else if (VARBINARY.equals(type)) {
            // varbinary type is read-only in Hive, but we support it
            return Base64.getEncoder().encodeToString(VARBINARY.getSlice(block, position).getBytes());
        }
        else if (type instanceof VarcharType) {
            return type.getSlice(block, position).toStringUtf8();
        }
        else if (type instanceof CharType charType) {
            // char type is read-only in Hive, but we support it
            return Chars.padSpaces(charType.getSlice(block, position), charType).toStringUtf8();
        }
        else if (type instanceof ArrayType arrayType) {
            Type elementType = arrayType.getElementType();
            Block arrayBlock = arrayType.getObject(block, position);

            List jsonArray = new ArrayList<>();
            for (int arrayIndex = 0; arrayIndex < arrayBlock.getPositionCount(); arrayIndex++) {
                Object elementValue = writeValue(elementType, arrayBlock, arrayIndex);
                jsonArray.add(elementValue);
            }
            return jsonArray;
        }
        else if (type instanceof MapType mapType) {
            Type keyType = mapType.getKeyType();
            if (isStructuralType(keyType)) {
                throw new RuntimeException("Unsupported map key type: " + keyType);
            }
            Type valueType = mapType.getValueType();
            SqlMap sqlMap = mapType.getObject(block, position);

            int rawOffset = sqlMap.getRawOffset();
            Block rawKeyBlock = sqlMap.getRawKeyBlock();
            Block rawValueBlock = sqlMap.getRawValueBlock();

            Map jsonMap = new LinkedHashMap<>();
            for (int mapIndex = 0; mapIndex < sqlMap.getSize(); mapIndex++) {
                try {
                    Object key = writeValue(keyType, rawKeyBlock, rawOffset + mapIndex);
                    if (key == null) {
                        throw new RuntimeException("OpenX JsonSerDe can not write a null map key");
                    }
                    String fieldName;
                    if (key instanceof Map jsonObject) {
                        fieldName = writeJsonObject(jsonObject);
                    }
                    else if (key instanceof List list) {
                        fieldName = writeJsonArray(list);
                    }
                    else {
                        fieldName = key.toString();
                    }

                    Object value = writeValue(valueType, rawValueBlock, rawOffset + mapIndex);
                    jsonMap.put(fieldName, value);
                }
                catch (InvalidJsonException _) {
                }
            }
            return jsonMap;
        }
        else if (type instanceof RowType rowType) {
            List fields = rowType.getFields();
            SqlRow sqlRow = rowType.getObject(block, position);
            int rawIndex = sqlRow.getRawIndex();

            Map jsonObject = new LinkedHashMap<>();
            for (int fieldIndex = 0; fieldIndex < fields.size(); fieldIndex++) {
                Field field = fields.get(fieldIndex);
                Block fieldBlock = sqlRow.getRawFieldBlock(fieldIndex);
                String fieldName = field.getName().orElseThrow();
                Object fieldValue = writeValue(field.getType(), fieldBlock, rawIndex);
                if (options.isExplicitNull() || fieldValue != null) {
                    jsonObject.put(fieldName, fieldValue);
                }
            }
            return jsonObject;
        }
        else {
            throw new UnsupportedOperationException("Unsupported column type: " + type);
        }
    }

    public static boolean isSupportedType(Type type)
    {
        if (type instanceof ArrayType arrayType) {
            return isSupportedType(arrayType.getElementType());
        }
        if (type instanceof MapType mapType) {
            return !isStructuralType(mapType.getKeyType()) &&
                   isSupportedType(mapType.getKeyType()) &&
                   isSupportedType(mapType.getValueType());
        }
        if (type instanceof RowType rowType) {
            return rowType.getFields().stream()
                    .map(Field::getType)
                    .allMatch(OpenXJsonSerializer::isSupportedType);
        }

        return BOOLEAN.equals(type) ||
               BIGINT.equals(type) ||
               INTEGER.equals(type) ||
               SMALLINT.equals(type) ||
               TINYINT.equals(type) ||
               type instanceof DecimalType ||
               REAL.equals(type) ||
               DOUBLE.equals(type) ||
               DATE.equals(type) ||
               type instanceof TimestampType ||
               VARBINARY.equals(type) ||
               type instanceof VarcharType ||
               type instanceof CharType;
    }

    private static boolean isStructuralType(Type type)
    {
        return type instanceof MapType || type instanceof ArrayType || type instanceof RowType;
    }
}