All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriter Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.deltalake.transactionlog.checkpoint;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import io.trino.filesystem.TrinoOutputFile;
import io.trino.parquet.writer.ParquetSchemaConverter;
import io.trino.parquet.writer.ParquetWriter;
import io.trino.parquet.writer.ParquetWriterOptions;
import io.trino.plugin.deltalake.DeltaLakeColumnHandle;
import io.trino.plugin.deltalake.DeltaLakeColumnMetadata;
import io.trino.plugin.deltalake.transactionlog.AddFileEntry;
import io.trino.plugin.deltalake.transactionlog.MetadataEntry;
import io.trino.plugin.deltalake.transactionlog.ProtocolEntry;
import io.trino.plugin.deltalake.transactionlog.RemoveFileEntry;
import io.trino.plugin.deltalake.transactionlog.TransactionEntry;
import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics;
import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeJsonFileStatistics;
import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics;
import io.trino.spi.PageBuilder;
import io.trino.spi.block.ArrayBlockBuilder;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.block.MapBlockBuilder;
import io.trino.spi.block.RowBlockBuilder;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.DateTimeEncoding;
import io.trino.spi.type.MapType;
import io.trino.spi.type.RowType;
import io.trino.spi.type.RowType.Field;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
import io.trino.spi.type.TypeManager;
import jakarta.annotation.Nullable;
import org.apache.parquet.format.CompressionCodec;
import org.joda.time.DateTimeZone;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Predicates.alwaysTrue;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeParquetStatisticsUtils.jsonValueToTrinoValue;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeParquetStatisticsUtils.toJsonValues;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeParquetStatisticsUtils.toNullCounts;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson;
import static io.trino.plugin.deltalake.transactionlog.MetadataEntry.DELTA_CHECKPOINT_WRITE_STATS_AS_JSON_PROPERTY;
import static io.trino.plugin.deltalake.transactionlog.MetadataEntry.DELTA_CHECKPOINT_WRITE_STATS_AS_STRUCT_PROPERTY;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.deserializePartitionValue;
import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND;
import static io.trino.spi.type.TypeUtils.writeNativeValue;
import static java.lang.Math.multiplyExact;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toMap;

public class CheckpointWriter
{
    private static final int METADATA_BLOCK_CHANNEL = 0;
    private static final int PROTOCOL_BLOCK_CHANNEL = 1;
    private static final int TXN_BLOCK_CHANNEL = 2;
    private static final int ADD_BLOCK_CHANNEL = 3;
    private static final int REMOVE_BLOCK_CHANNEL = 4;

    // must match channel list above
    private static final int CHANNELS_COUNT = 5;

    private final TypeManager typeManager;
    private final CheckpointSchemaManager checkpointSchemaManager;
    private final String trinoVersion;
    private final ParquetWriterOptions parquetWriterOptions;

    public CheckpointWriter(TypeManager typeManager, CheckpointSchemaManager checkpointSchemaManager, String trinoVersion)
    {
        this(typeManager, checkpointSchemaManager, trinoVersion, ParquetWriterOptions.builder().build());
    }

    @VisibleForTesting
    public CheckpointWriter(TypeManager typeManager, CheckpointSchemaManager checkpointSchemaManager, String trinoVersion, ParquetWriterOptions parquetWriterOptions)
    {
        this.typeManager = requireNonNull(typeManager, "typeManager is null");
        this.checkpointSchemaManager = requireNonNull(checkpointSchemaManager, "checkpointSchemaManager is null");
        this.trinoVersion = requireNonNull(trinoVersion, "trinoVersion is null");
        this.parquetWriterOptions = requireNonNull(parquetWriterOptions, "parquetWriterOptions is null");
    }

    public void write(CheckpointEntries entries, TrinoOutputFile outputFile)
            throws IOException
    {
        Map configuration = entries.getMetadataEntry().getConfiguration();
        boolean writeStatsAsJson = Boolean.parseBoolean(configuration.getOrDefault(DELTA_CHECKPOINT_WRITE_STATS_AS_JSON_PROPERTY, "true"));
        // The default value is false in https://github.com/delta-io/delta/blob/master/PROTOCOL.md#checkpoint-format, but Databricks defaults to true
        boolean writeStatsAsStruct = Boolean.parseBoolean(configuration.getOrDefault(DELTA_CHECKPOINT_WRITE_STATS_AS_STRUCT_PROPERTY, "true"));

        ProtocolEntry protocolEntry = entries.getProtocolEntry();

        RowType metadataEntryType = checkpointSchemaManager.getMetadataEntryType();
        RowType protocolEntryType = checkpointSchemaManager.getProtocolEntryType(protocolEntry.getReaderFeatures().isPresent(), protocolEntry.getWriterFeatures().isPresent());
        RowType txnEntryType = checkpointSchemaManager.getTxnEntryType();
        RowType addEntryType = checkpointSchemaManager.getAddEntryType(
                entries.getMetadataEntry(),
                entries.getProtocolEntry(),
                alwaysTrue(),
                writeStatsAsJson,
                writeStatsAsStruct,
                true);
        RowType removeEntryType = checkpointSchemaManager.getRemoveEntryType();

        List columnNames = ImmutableList.of(
                "metaData",
                "protocol",
                "txn",
                "add",
                "remove");
        List columnTypes = ImmutableList.of(
                metadataEntryType,
                protocolEntryType,
                txnEntryType,
                addEntryType,
                removeEntryType);

        ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(columnTypes, columnNames, false, false);

        ParquetWriter writer = new ParquetWriter(
                outputFile.create(),
                schemaConverter.getMessageType(),
                schemaConverter.getPrimitiveTypes(),
                parquetWriterOptions,
                CompressionCodec.SNAPPY,
                trinoVersion,
                Optional.of(DateTimeZone.UTC),
                Optional.empty());

        PageBuilder pageBuilder = new PageBuilder(columnTypes);

        writeMetadataEntry(pageBuilder, metadataEntryType, entries.getMetadataEntry());
        writeProtocolEntry(pageBuilder, protocolEntryType, entries.getProtocolEntry());
        for (TransactionEntry transactionEntry : entries.getTransactionEntries()) {
            writeTransactionEntry(pageBuilder, txnEntryType, transactionEntry);
        }
        List partitionColumns = extractPartitionColumns(entries.getMetadataEntry(), entries.getProtocolEntry(), typeManager);
        List partitionValuesParsedFieldTypes = partitionColumns.stream()
                .map(column -> RowType.field(column.getColumnName(), column.getType()))
                .collect(toImmutableList());
        for (AddFileEntry addFileEntry : entries.getAddFileEntries()) {
            writeAddFileEntry(pageBuilder, addEntryType, addFileEntry, entries.getMetadataEntry(), entries.getProtocolEntry(), partitionColumns, partitionValuesParsedFieldTypes, writeStatsAsJson, writeStatsAsStruct);
        }
        for (RemoveFileEntry removeFileEntry : entries.getRemoveFileEntries()) {
            writeRemoveFileEntry(pageBuilder, removeEntryType, removeFileEntry);
        }
        // Not writing commit infos for now. DB does not keep them in the checkpoints by default

        writer.write(pageBuilder.build());
        writer.close();
    }

    private void writeMetadataEntry(PageBuilder pageBuilder, RowType entryType, MetadataEntry metadataEntry)
    {
        pageBuilder.declarePosition();
        ((RowBlockBuilder) pageBuilder.getBlockBuilder(METADATA_BLOCK_CHANNEL)).buildEntry(fieldBuilders -> {
            writeString(fieldBuilders.get(0), entryType, 0, "id", metadataEntry.getId());
            writeString(fieldBuilders.get(1), entryType, 1, "name", metadataEntry.getName());
            writeString(fieldBuilders.get(2), entryType, 2, "description", metadataEntry.getDescription());

            RowType formatType = getInternalRowType(entryType, 3, "format");
            ((RowBlockBuilder) fieldBuilders.get(3)).buildEntry(formatBlockBuilders -> {
                writeString(formatBlockBuilders.get(0), formatType, 0, "provider", metadataEntry.getFormat().getProvider());
                writeStringMap(formatBlockBuilders.get(1), formatType, 1, "options", metadataEntry.getFormat().getOptions());
            });

            writeString(fieldBuilders.get(4), entryType, 4, "schemaString", metadataEntry.getSchemaString());
            writeStringList(fieldBuilders.get(5), entryType, 5, "partitionColumns", metadataEntry.getOriginalPartitionColumns());
            writeStringMap(fieldBuilders.get(6), entryType, 6, "configuration", metadataEntry.getConfiguration());
            writeLong(fieldBuilders.get(7), entryType, 7, "createdTime", metadataEntry.getCreatedTime());
        });

        // null for others
        appendNullOtherBlocks(pageBuilder, METADATA_BLOCK_CHANNEL);
    }

    private void writeProtocolEntry(PageBuilder pageBuilder, RowType entryType, ProtocolEntry protocolEntry)
    {
        pageBuilder.declarePosition();
        ((RowBlockBuilder) pageBuilder.getBlockBuilder(PROTOCOL_BLOCK_CHANNEL)).buildEntry(fieldBuilders -> {
            int fieldId = 0;
            writeLong(fieldBuilders.get(fieldId), entryType, fieldId, "minReaderVersion", (long) protocolEntry.getMinReaderVersion());
            fieldId++;

            writeLong(fieldBuilders.get(fieldId), entryType, fieldId, "minWriterVersion", (long) protocolEntry.getMinWriterVersion());
            fieldId++;

            if (protocolEntry.getReaderFeatures().isPresent()) {
                writeStringList(fieldBuilders.get(fieldId), entryType, fieldId, "readerFeatures", protocolEntry.getReaderFeatures().get().stream().collect(toImmutableList()));
                fieldId++;
            }

            if (protocolEntry.getWriterFeatures().isPresent()) {
                writeStringList(fieldBuilders.get(fieldId), entryType, fieldId, "writerFeatures", protocolEntry.getWriterFeatures().get().stream().collect(toImmutableList()));
            }
        });

        // null for others
        appendNullOtherBlocks(pageBuilder, PROTOCOL_BLOCK_CHANNEL);
    }

    private void writeTransactionEntry(PageBuilder pageBuilder, RowType entryType, TransactionEntry transactionEntry)
    {
        pageBuilder.declarePosition();
        ((RowBlockBuilder) pageBuilder.getBlockBuilder(TXN_BLOCK_CHANNEL)).buildEntry(fieldBuilders -> {
            writeString(fieldBuilders.get(0), entryType, 0, "appId", transactionEntry.getAppId());
            writeLong(fieldBuilders.get(1), entryType, 1, "version", transactionEntry.getVersion());
            writeLong(fieldBuilders.get(2), entryType, 2, "lastUpdated", transactionEntry.getLastUpdated());
        });

        // null for others
        appendNullOtherBlocks(pageBuilder, TXN_BLOCK_CHANNEL);
    }

    private void writeAddFileEntry(
            PageBuilder pageBuilder,
            RowType entryType,
            AddFileEntry addFileEntry,
            MetadataEntry metadataEntry,
            ProtocolEntry protocolEntry,
            List partitionColumns,
            List partitionValuesParsedFieldTypes,
            boolean writeStatsAsJson,
            boolean writeStatsAsStruct)
    {
        pageBuilder.declarePosition();
        RowBlockBuilder blockBuilder = (RowBlockBuilder) pageBuilder.getBlockBuilder(ADD_BLOCK_CHANNEL);
        blockBuilder.buildEntry(fieldBuilders -> {
            int fieldId = 0;
            writeString(fieldBuilders.get(fieldId), entryType, fieldId, "path", addFileEntry.getPath());
            fieldId++;

            writeStringMap(fieldBuilders.get(fieldId), entryType, fieldId, "partitionValues", addFileEntry.getPartitionValues());
            fieldId++;

            writeLong(fieldBuilders.get(fieldId), entryType, fieldId, "size", addFileEntry.getSize());
            fieldId++;

            writeLong(fieldBuilders.get(fieldId), entryType, fieldId, "modificationTime", addFileEntry.getModificationTime());
            fieldId++;

            writeBoolean(fieldBuilders.get(fieldId), entryType, fieldId, "dataChange", addFileEntry.isDataChange());
            fieldId++;

            if (writeStatsAsJson) {
                writeJsonStats(fieldBuilders.get(fieldId), entryType, addFileEntry, metadataEntry, protocolEntry, fieldId);
                fieldId++;
            }

            if (writeStatsAsStruct) {
                if (!addFileEntry.getPartitionValues().isEmpty()) {
                    writeParsedPartitionValues(fieldBuilders.get(fieldId), entryType, addFileEntry, partitionColumns, partitionValuesParsedFieldTypes, fieldId);
                    fieldId++;
                }

                writeParsedStats(fieldBuilders.get(fieldId), entryType, addFileEntry, fieldId);
                fieldId++;
            }

            writeStringMap(fieldBuilders.get(fieldId), entryType, fieldId, "tags", addFileEntry.getTags());
        });

        // null for others
        appendNullOtherBlocks(pageBuilder, ADD_BLOCK_CHANNEL);
    }

    private void writeJsonStats(BlockBuilder entryBlockBuilder, RowType entryType, AddFileEntry addFileEntry, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, int fieldId)
    {
        String statsJson = null;
        if (addFileEntry.getStats().isPresent()) {
            DeltaLakeFileStatistics statistics = addFileEntry.getStats().get();
            if (statistics instanceof DeltaLakeParquetFileStatistics parquetFileStatistics) {
                Map columnTypeMapping = getColumnTypeMapping(metadataEntry, protocolEntry);
                DeltaLakeJsonFileStatistics jsonFileStatistics = new DeltaLakeJsonFileStatistics(
                        parquetFileStatistics.getNumRecords(),
                        parquetFileStatistics.getMinValues().map(values -> toJsonValues(columnTypeMapping, values)),
                        parquetFileStatistics.getMaxValues().map(values -> toJsonValues(columnTypeMapping, values)),
                        parquetFileStatistics.getNullCount().map(nullCounts -> toNullCounts(columnTypeMapping, nullCounts)));
                statsJson = getStatsString(jsonFileStatistics).orElse(null);
            }
            else {
                statsJson = addFileEntry.getStatsString().orElse(null);
            }
        }
        writeString(entryBlockBuilder, entryType, fieldId, "stats", statsJson);
    }

    private Map getColumnTypeMapping(MetadataEntry deltaMetadata, ProtocolEntry protocolEntry)
    {
        return extractSchema(deltaMetadata, protocolEntry, typeManager).stream()
                .collect(toImmutableMap(DeltaLakeColumnMetadata::getPhysicalName, DeltaLakeColumnMetadata::getPhysicalColumnType));
    }

    private Optional getStatsString(DeltaLakeJsonFileStatistics parsedStats)
    {
        try {
            return Optional.of(serializeStatsAsJson(parsedStats));
        }
        catch (JsonProcessingException e) {
            return Optional.empty();
        }
    }

    private void writeParsedPartitionValues(
            BlockBuilder entryBlockBuilder,
            RowType entryType,
            AddFileEntry addFileEntry,
            List partitionColumns,
            List partitionValuesParsedFieldTypes,
            int fieldId)
    {
        RowType partitionValuesParsedType = getInternalRowType(entryType, fieldId, "partitionValues_parsed");
        ((RowBlockBuilder) entryBlockBuilder).buildEntry(fieldBuilders -> {
            for (int i = 0; i < partitionValuesParsedFieldTypes.size(); i++) {
                RowType.Field partitionValueField = partitionValuesParsedFieldTypes.get(i);
                String partitionColumnName = partitionValueField.getName().orElseThrow();
                String partitionValue = addFileEntry.getPartitionValues().get(partitionColumnName);
                validateAndGetField(partitionValuesParsedType, i, partitionColumnName);
                if (partitionValue == null) {
                    fieldBuilders.get(i).appendNull();
                    continue;
                }
                DeltaLakeColumnHandle partitionColumn = partitionColumns.get(i);
                Object deserializedPartitionValue = deserializePartitionValue(partitionColumn, Optional.of(partitionValue));
                writeNativeValue(partitionValueField.getType(), fieldBuilders.get(i), deserializedPartitionValue);
            }
        });
    }

    private void writeParsedStats(BlockBuilder entryBlockBuilder, RowType entryType, AddFileEntry addFileEntry, int fieldId)
    {
        RowType statsType = getInternalRowType(entryType, fieldId, "stats_parsed");
        if (addFileEntry.getStats().isEmpty()) {
            entryBlockBuilder.appendNull();
            return;
        }
        DeltaLakeFileStatistics stats = addFileEntry.getStats().get();
        ((RowBlockBuilder) entryBlockBuilder).buildEntry(fieldBuilders -> {
            if (stats instanceof DeltaLakeParquetFileStatistics) {
                writeLong(fieldBuilders.get(0), statsType, 0, "numRecords", stats.getNumRecords().orElse(null));
                writeMinMaxMapAsFields(fieldBuilders.get(1), statsType, 1, "minValues", stats.getMinValues(), false);
                writeMinMaxMapAsFields(fieldBuilders.get(2), statsType, 2, "maxValues", stats.getMaxValues(), false);
                writeNullCountAsFields(fieldBuilders.get(3), statsType, 3, "nullCount", stats.getNullCount());
            }
            else {
                int internalFieldId = 0;

                writeLong(fieldBuilders.get(internalFieldId), statsType, internalFieldId, "numRecords", stats.getNumRecords().orElse(null));
                internalFieldId++;

                if (statsType.getFields().stream().anyMatch(field -> field.getName().orElseThrow().equals("minValues"))) {
                    writeMinMaxMapAsFields(fieldBuilders.get(internalFieldId), statsType, internalFieldId, "minValues", stats.getMinValues(), true);
                    internalFieldId++;
                }
                if (statsType.getFields().stream().anyMatch(field -> field.getName().orElseThrow().equals("maxValues"))) {
                    writeMinMaxMapAsFields(fieldBuilders.get(internalFieldId), statsType, internalFieldId, "maxValues", stats.getMaxValues(), true);
                    internalFieldId++;
                }
                writeNullCountAsFields(fieldBuilders.get(internalFieldId), statsType, internalFieldId, "nullCount", stats.getNullCount());
            }
        });
    }

    private void writeMinMaxMapAsFields(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, Optional> values, boolean isJson)
    {
        RowType.Field valuesField = validateAndGetField(type, fieldId, fieldName);
        RowType valuesFieldType = (RowType) valuesField.getType();
        writeObjectMapAsFields(blockBuilder, type, fieldId, fieldName, preprocessMinMaxValues(valuesFieldType, values, isJson));
    }

    private void writeNullCountAsFields(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, Optional> values)
    {
        writeObjectMapAsFields(blockBuilder, type, fieldId, fieldName, preprocessNullCount(values));
    }

    private void writeObjectMapAsFields(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, Optional> values)
    {
        if (values.isEmpty()) {
            blockBuilder.appendNull();
            return;
        }

        Field valuesField = validateAndGetField(type, fieldId, fieldName);
        List fields = ((RowType) valuesField.getType()).getFields();
        ((RowBlockBuilder) blockBuilder).buildEntry(fieldBuilders -> {
            for (int i = 0; i < fields.size(); i++) {
                Field field = fields.get(i);
                BlockBuilder fieldBlockBuilder = fieldBuilders.get(i);
                Object value = values.get().get(field.getName().orElseThrow());
                writeNativeValue(field.getType(), fieldBlockBuilder, value);
            }
        });
    }

    private Optional> preprocessMinMaxValues(RowType valuesType, Optional> valuesOptional, boolean isJson)
    {
        return valuesOptional.map(
                values -> {
                    Map fieldTypes = valuesType.getFields().stream().collect(toMap(
                            // anonymous row fields are not expected here
                            field -> field.getName().orElseThrow(),
                            RowType.Field::getType));

                    return values.entrySet().stream()
                            .collect(toMap(
                                    Map.Entry::getKey,
                                    entry -> {
                                        Type type = fieldTypes.get(entry.getKey());
                                        Object value = entry.getValue();
                                        if (isJson) {
                                            return jsonValueToTrinoValue(type, value);
                                        }
                                        if (type instanceof TimestampType) {
                                            // We need to remap TIMESTAMP WITH TIME ZONE -> TIMESTAMP here because of
                                            // inconsistency in what type is used for DL "timestamp" type in data processing and in min/max statistics map.
                                            value = multiplyExact(DateTimeEncoding.unpackMillisUtc((long) value), MICROSECONDS_PER_MILLISECOND);
                                        }
                                        return value;
                                    }));
                });
    }

    private Optional> preprocessNullCount(Optional> valuesOptional)
    {
        return valuesOptional.map(
                values ->
                        values.entrySet().stream()
                                .collect(toMap(
                                        Map.Entry::getKey,
                                        entry -> {
                                            Object value = entry.getValue();
                                            if (value instanceof Integer) {
                                                return (long) (int) value;
                                            }
                                            return value;
                                        })));
    }

    private void writeRemoveFileEntry(PageBuilder pageBuilder, RowType entryType, RemoveFileEntry removeFileEntry)
    {
        pageBuilder.declarePosition();
        ((RowBlockBuilder) pageBuilder.getBlockBuilder(REMOVE_BLOCK_CHANNEL)).buildEntry(fieldBuilders -> {
            writeString(fieldBuilders.get(0), entryType, 0, "path", removeFileEntry.getPath());
            writeLong(fieldBuilders.get(1), entryType, 1, "deletionTimestamp", removeFileEntry.getDeletionTimestamp());
            writeBoolean(fieldBuilders.get(2), entryType, 2, "dataChange", removeFileEntry.isDataChange());
        });

        // null for others
        appendNullOtherBlocks(pageBuilder, REMOVE_BLOCK_CHANNEL);
    }

    private void appendNullOtherBlocks(PageBuilder pageBuilder, int handledBlock)
    {
        for (int channel = 0; channel < CHANNELS_COUNT; ++channel) {
            if (channel == handledBlock) {
                continue;
            }
            pageBuilder.getBlockBuilder(channel).appendNull();
        }
    }

    private void writeString(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, @Nullable String value)
    {
        RowType.Field field = validateAndGetField(type, fieldId, fieldName);
        if (value == null) {
            blockBuilder.appendNull();
            return;
        }
        field.getType().writeSlice(blockBuilder, utf8Slice(value));
    }

    private void writeLong(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, @Nullable Long value)
    {
        RowType.Field field = validateAndGetField(type, fieldId, fieldName);
        if (value == null) {
            blockBuilder.appendNull();
            return;
        }
        field.getType().writeLong(blockBuilder, value);
    }

    private void writeBoolean(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, boolean value)
    {
        RowType.Field field = validateAndGetField(type, fieldId, fieldName);
        field.getType().writeBoolean(blockBuilder, value);
    }

    private void writeStringMap(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, @Nullable Map values)
    {
        RowType.Field field = validateAndGetField(type, fieldId, fieldName);
        checkArgument(field.getType() instanceof MapType, "Expected field %s/%s to by of MapType but got %s", fieldId, fieldName, field.getType());
        if (values == null) {
            blockBuilder.appendNull();
            return;
        }
        MapType mapType = (MapType) field.getType();
        ((MapBlockBuilder) blockBuilder).buildEntry((keyBlockBuilder, valueBlockBuilder) -> {
            for (Map.Entry entry : values.entrySet()) {
                mapType.getKeyType().writeSlice(keyBlockBuilder, utf8Slice(entry.getKey()));
                if (entry.getValue() == null) {
                    valueBlockBuilder.appendNull();
                }
                else {
                    mapType.getValueType().writeSlice(valueBlockBuilder, utf8Slice(entry.getValue()));
                }
            }
        });
    }

    private void writeStringList(BlockBuilder blockBuilder, RowType type, int fieldId, String fieldName, @Nullable List values)
    {
        RowType.Field field = validateAndGetField(type, fieldId, fieldName);
        checkArgument(field.getType() instanceof ArrayType, "Expected field %s/%s to by of ArrayType but got %s", fieldId, fieldName, field.getType());
        if (values == null) {
            blockBuilder.appendNull();
            return;
        }
        ArrayType arrayType = (ArrayType) field.getType();
        ((ArrayBlockBuilder) blockBuilder).buildEntry(elementBuilder -> {
            for (String value : values) {
                if (value == null) {
                    elementBuilder.appendNull();
                }
                else {
                    arrayType.getElementType().writeSlice(elementBuilder, utf8Slice(value));
                }
            }
        });
    }

    private RowType getInternalRowType(RowType type, int fieldId, String fieldName)
    {
        RowType.Field field = validateAndGetField(type, fieldId, fieldName);
        checkArgument(field.getType() instanceof RowType, "Expected field %s/%s to by of RowType but got %s", fieldId, fieldName, field.getType());
        return (RowType) field.getType();
    }

    private RowType.Field validateAndGetField(RowType type, int fieldId, String fieldName)
    {
        checkArgument(type.getFields().size() > fieldId, "Field %s/%s not found for type %s", fieldId, fieldName, type);
        RowType.Field field = type.getFields().get(fieldId);
        // anonymous row fields are not expected here
        checkArgument(field.getName().orElseThrow().equals(fieldName), "Expected %s for field %s but got %s for type %s", fieldName, fieldId, field.getName(), type);
        return field;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy