All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.parquet.ColumnStatisticsValidation Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.parquet;

import com.google.common.collect.ImmutableList;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.ColumnarArray;
import io.trino.spi.block.ColumnarMap;
import io.trino.spi.block.RowBlock;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.MapType;
import io.trino.spi.type.RowType;
import io.trino.spi.type.Type;

import java.util.List;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;
import static io.trino.spi.block.ColumnarArray.toColumnarArray;
import static io.trino.spi.block.ColumnarMap.toColumnarMap;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

class ColumnStatisticsValidation
{
    private final Type type;
    private final List fieldBuilders;

    private long valuesCount;
    private long nonLeafValuesCount;

    public ColumnStatisticsValidation(Type type)
    {
        this.type = requireNonNull(type, "type is null");
        this.fieldBuilders = type.getTypeParameters().stream()
                .map(ColumnStatisticsValidation::new)
                .collect(toImmutableList());
    }

    public void addBlock(Block block)
    {
        addBlock(block, new ColumnStatistics(0, 0));
    }

    public List build()
    {
        if (fieldBuilders.isEmpty()) {
            return ImmutableList.of(new ColumnStatistics(valuesCount, nonLeafValuesCount));
        }
        return fieldBuilders.stream()
                .flatMap(builder -> builder.build().stream())
                .collect(toImmutableList());
    }

    private void addBlock(Block block, ColumnStatistics columnStatistics)
    {
        if (fieldBuilders.isEmpty()) {
            addPrimitiveBlock(block);
            valuesCount += columnStatistics.valuesCount();
            nonLeafValuesCount += columnStatistics.nonLeafValuesCount();
            return;
        }

        List fields;
        ColumnStatistics mergedColumnStatistics;
        if (type instanceof ArrayType) {
            ColumnarArray columnarArray = toColumnarArray(block);
            fields = ImmutableList.of(columnarArray.getElementsBlock());
            mergedColumnStatistics = columnStatistics.merge(addArrayBlock(columnarArray));
        }
        else if (type instanceof MapType) {
            ColumnarMap columnarMap = toColumnarMap(block);
            fields = ImmutableList.of(columnarMap.getKeysBlock(), columnarMap.getValuesBlock());
            mergedColumnStatistics = columnStatistics.merge(addMapBlock(columnarMap));
        }
        else if (type instanceof RowType) {
            // the validation code is designed to work with null-suppressed blocks
            fields = RowBlock.getNullSuppressedRowFieldsFromBlock(block);
            mergedColumnStatistics = columnStatistics.merge(addRowBlock(block));
        }
        else {
            throw new TrinoException(NOT_SUPPORTED, format("Unsupported type: %s", type));
        }

        for (int i = 0; i < fieldBuilders.size(); i++) {
            fieldBuilders.get(i).addBlock(fields.get(i), mergedColumnStatistics);
        }
    }

    private void addPrimitiveBlock(Block block)
    {
        valuesCount += block.getPositionCount();
        if (!block.mayHaveNull()) {
            return;
        }
        int nullsCount = 0;
        for (int position = 0; position < block.getPositionCount(); position++) {
            nullsCount += block.isNull(position) ? 1 : 0;
        }
        nonLeafValuesCount += nullsCount;
    }

    private static ColumnStatistics addMapBlock(ColumnarMap block)
    {
        if (!block.mayHaveNull()) {
            int emptyEntriesCount = 0;
            for (int position = 0; position < block.getPositionCount(); position++) {
                emptyEntriesCount += block.getEntryCount(position) == 0 ? 1 : 0;
            }
            return new ColumnStatistics(emptyEntriesCount, emptyEntriesCount);
        }
        int nonLeafValuesCount = 0;
        for (int position = 0; position < block.getPositionCount(); position++) {
            nonLeafValuesCount += block.isNull(position) || block.getEntryCount(position) == 0 ? 1 : 0;
        }
        return new ColumnStatistics(nonLeafValuesCount, nonLeafValuesCount);
    }

    private static ColumnStatistics addArrayBlock(ColumnarArray block)
    {
        if (!block.mayHaveNull()) {
            int emptyEntriesCount = 0;
            for (int position = 0; position < block.getPositionCount(); position++) {
                emptyEntriesCount += block.getLength(position) == 0 ? 1 : 0;
            }
            return new ColumnStatistics(emptyEntriesCount, emptyEntriesCount);
        }
        int nonLeafValuesCount = 0;
        for (int position = 0; position < block.getPositionCount(); position++) {
            nonLeafValuesCount += block.isNull(position) || block.getLength(position) == 0 ? 1 : 0;
        }
        return new ColumnStatistics(nonLeafValuesCount, nonLeafValuesCount);
    }

    private static ColumnStatistics addRowBlock(Block block)
    {
        if (!block.mayHaveNull()) {
            return new ColumnStatistics(0, 0);
        }
        int nullsCount = 0;
        for (int position = 0; position < block.getPositionCount(); position++) {
            nullsCount += block.isNull(position) ? 1 : 0;
        }
        return new ColumnStatistics(nullsCount, nullsCount);
    }

    /**
     * @param valuesCount Count of values for a column field, including nulls, empty and defined values.
     * @param nonLeafValuesCount Count of non-leaf values for a column field, this is nulls count for primitives
     * and count of values below the max definition level for nested types
     */
    record ColumnStatistics(long valuesCount, long nonLeafValuesCount)
    {
        ColumnStatistics merge(ColumnStatistics other)
        {
            return new ColumnStatistics(
                    valuesCount + other.valuesCount(),
                    nonLeafValuesCount + other.nonLeafValuesCount());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy