io.trino.parquet.predicate.PredicateUtils Maven / Gradle / Ivy

Go to download
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.parquet.predicate;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceInput;
import io.trino.parquet.BloomFilterStore;
import io.trino.parquet.DictionaryPage;
import io.trino.parquet.ParquetCorruptionException;
import io.trino.parquet.ParquetDataSource;
import io.trino.parquet.ParquetDataSourceId;
import io.trino.parquet.ParquetEncoding;
import io.trino.parquet.ParquetReaderOptions;
import io.trino.parquet.metadata.BlockMetadata;
import io.trino.parquet.metadata.ColumnChunkMetadata;
import io.trino.parquet.metadata.PrunedBlockMetadata;
import io.trino.parquet.reader.RowGroupInfo;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.Type;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.format.DictionaryPageHeader;
import org.apache.parquet.format.PageHeader;
import org.apache.parquet.format.PageType;
import org.apache.parquet.format.Util;
import org.apache.parquet.internal.column.columnindex.OffsetIndex;
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.schema.MessageType;
import org.joda.time.DateTimeZone;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

import static io.trino.parquet.BloomFilterStore.getBloomFilterStore;
import static io.trino.parquet.ParquetCompressionUtils.decompress;
import static io.trino.parquet.ParquetReaderUtils.isOnlyDictionaryEncodingPages;
import static io.trino.parquet.ParquetTypeUtils.getParquetEncoding;
import static io.trino.parquet.metadata.PrunedBlockMetadata.createPrunedColumnsMetadata;
import static io.trino.parquet.reader.TrinoColumnIndexStore.getColumnIndexStore;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.DateType.DATE;
import static io.trino.spi.type.IntegerType.INTEGER;
import static io.trino.spi.type.SmallintType.SMALLINT;
import static io.trino.spi.type.TinyintType.TINYINT;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public final class PredicateUtils
{
    // Maximum size of dictionary that we will read for row-group pruning.
    // Reading larger dictionaries is typically not beneficial. Before checking
    // the dictionary, the row-group, page indexes and bloomfilters have already been checked
    // and when the dictionary does not eliminate a row-group, the work done to
    // decode the dictionary and match it with predicates is wasted.
    private static final int MAX_DICTIONARY_SIZE = 8096;

    private PredicateUtils() {}

    public static boolean isStatisticsOverflow(Type type, long min, long max)
    {
        if (type == TINYINT) {
            return min < Byte.MIN_VALUE || max > Byte.MAX_VALUE;
        }
        if (type == SMALLINT) {
            return min < Short.MIN_VALUE || max > Short.MAX_VALUE;
        }
        if (type == INTEGER || type == DATE) {
            return min < Integer.MIN_VALUE || max > Integer.MAX_VALUE;
        }
        if (type == BIGINT) {
            return false;
        }
        if (type instanceof DecimalType decimalType) {
            if (!decimalType.isShort()) {
                // Smallest long decimal type with 0 scale has broader range than representable in long, as used in ParquetLongStatistics
                return false;
            }
            return BigDecimal.valueOf(min, decimalType.getScale()).compareTo(minimalValue(decimalType)) < 0 ||
                    BigDecimal.valueOf(max, decimalType.getScale()).compareTo(maximalValue(decimalType)) > 0;
        }

        throw new IllegalArgumentException("Unsupported type: " + type);
    }

    private static BigDecimal minimalValue(DecimalType decimalType)
    {
        return new BigDecimal(format("-%s.%s", "9".repeat(decimalType.getPrecision() - decimalType.getScale()), "9".repeat(decimalType.getScale())));
    }

    private static BigDecimal maximalValue(DecimalType decimalType)
    {
        return new BigDecimal(format("+%s.%s", "9".repeat(decimalType.getPrecision() - decimalType.getScale()), "9".repeat(decimalType.getScale())));
    }

    public static TupleDomainParquetPredicate buildPredicate(
            MessageType requestedSchema,
            TupleDomain parquetTupleDomain,
            Map, ColumnDescriptor> descriptorsByPath,
            DateTimeZone timeZone)
    {
        ImmutableList.Builder columnReferences = ImmutableList.builder();
        for (String[] paths : requestedSchema.getPaths()) {
            ColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(paths));
            if (descriptor != null) {
                columnReferences.add(descriptor);
            }
        }
        return new TupleDomainParquetPredicate(parquetTupleDomain, columnReferences.build(), timeZone);
    }

    public static boolean predicateMatches(
            TupleDomainParquetPredicate parquetPredicate,
            PrunedBlockMetadata columnsMetadata,
            ParquetDataSource dataSource,
            Map, ColumnDescriptor> descriptorsByPath,
            TupleDomain parquetTupleDomain,
            Optional columnIndexStore,
            Optional bloomFilterStore,
            DateTimeZone timeZone,
            int domainCompactionThreshold)
            throws IOException
    {
        if (columnsMetadata.getRowCount() == 0) {
            return false;
        }
        Map> columnStatistics = getStatistics(columnsMetadata, descriptorsByPath);
        Map columnValueCounts = getColumnValueCounts(columnsMetadata, descriptorsByPath);
        Optional> candidateColumns = parquetPredicate.getIndexLookupCandidates(columnValueCounts, columnStatistics, dataSource.getId());
        if (candidateColumns.isEmpty()) {
            return false;
        }
        if (candidateColumns.get().isEmpty()) {
            return true;
        }
        // Perform column index, bloom filter checks and dictionary lookups only for the subset of columns where it can be useful.
        // This prevents unnecessary filesystem reads and decoding work when the predicate on a column comes from
        // file-level min/max stats or more generally when the predicate selects a range equal to or wider than row-group min/max.
        TupleDomainParquetPredicate indexPredicate = new TupleDomainParquetPredicate(parquetTupleDomain, candidateColumns.get(), timeZone);

        // Page stats is finer grained but relatively more expensive, so we do the filtering after above block filtering.
        if (columnIndexStore.isPresent() && !indexPredicate.matches(columnValueCounts, columnIndexStore.get(), dataSource.getId())) {
            return false;
        }

        if (bloomFilterStore.isPresent() && !indexPredicate.matches(bloomFilterStore.get(), domainCompactionThreshold)) {
            return false;
        }

        return dictionaryPredicatesMatch(
                indexPredicate,
                columnsMetadata,
                dataSource,
                descriptorsByPath,
                ImmutableSet.copyOf(candidateColumns.get()),
                columnIndexStore);
    }

    public static List getFilteredRowGroups(
            long splitStart,
            long splitLength,
            ParquetDataSource dataSource,
            List blocksMetaData,
            List> parquetTupleDomains,
            List parquetPredicates,
            Map, ColumnDescriptor> descriptorsByPath,
            DateTimeZone timeZone,
            int domainCompactionThreshold,
            ParquetReaderOptions options)
            throws IOException
    {
        long fileRowCount = 0;
        ImmutableList.Builder rowGroupInfoBuilder = ImmutableList.builder();
        for (BlockMetadata block : blocksMetaData) {
            long blockStart = block.getStartingPos();
            boolean splitContainsBlock = splitStart <= blockStart && blockStart < splitStart + splitLength;
            if (splitContainsBlock) {
                for (int i = 0; i < parquetTupleDomains.size(); i++) {
                    TupleDomain parquetTupleDomain = parquetTupleDomains.get(i);
                    TupleDomainParquetPredicate parquetPredicate = parquetPredicates.get(i);
                    Optional columnIndex = getColumnIndexStore(dataSource, block, descriptorsByPath, parquetTupleDomain, options);
                    Optional bloomFilterStore = getBloomFilterStore(dataSource, block, parquetTupleDomain, options);
                    PrunedBlockMetadata columnsMetadata = createPrunedColumnsMetadata(block, dataSource.getId(), descriptorsByPath);
                    if (predicateMatches(
                            parquetPredicate,
                            columnsMetadata,
                            dataSource,
                            descriptorsByPath,
                            parquetTupleDomain,
                            columnIndex,
                            bloomFilterStore,
                            timeZone,
                            domainCompactionThreshold)) {
                        rowGroupInfoBuilder.add(new RowGroupInfo(columnsMetadata, fileRowCount, columnIndex));
                        break;
                    }
                }
            }
            fileRowCount += block.rowCount();
        }
        return rowGroupInfoBuilder.build();
    }

    private static Map> getStatistics(PrunedBlockMetadata columnsMetadata, Map, ColumnDescriptor> descriptorsByPath)
            throws ParquetCorruptionException
    {
        ImmutableMap.Builder> statistics = ImmutableMap.builderWithExpectedSize(descriptorsByPath.size());
        for (ColumnDescriptor descriptor : descriptorsByPath.values()) {
            ColumnChunkMetadata columnMetaData = columnsMetadata.getColumnChunkMetaData(descriptor);
            Statistics columnStatistics = columnMetaData.getStatistics();
            if (columnStatistics != null) {
                statistics.put(descriptor, columnStatistics);
            }
        }
        return statistics.buildOrThrow();
    }

    private static Map getColumnValueCounts(PrunedBlockMetadata columnsMetadata, Map, ColumnDescriptor> descriptorsByPath)
            throws ParquetCorruptionException
    {
        ImmutableMap.Builder columnValueCounts = ImmutableMap.builderWithExpectedSize(descriptorsByPath.size());
        for (ColumnDescriptor descriptor : descriptorsByPath.values()) {
            ColumnChunkMetadata columnMetaData = columnsMetadata.getColumnChunkMetaData(descriptor);
            columnValueCounts.put(descriptor, columnMetaData.getValueCount());
        }
        return columnValueCounts.buildOrThrow();
    }

    private static boolean dictionaryPredicatesMatch(
            TupleDomainParquetPredicate parquetPredicate,
            PrunedBlockMetadata columnsMetadata,
            ParquetDataSource dataSource,
            Map, ColumnDescriptor> descriptorsByPath,
            Set candidateColumns,
            Optional columnIndexStore)
            throws IOException
    {
        for (ColumnDescriptor descriptor : descriptorsByPath.values()) {
            ColumnChunkMetadata columnMetaData = columnsMetadata.getColumnChunkMetaData(descriptor);
            if (!candidateColumns.contains(descriptor)) {
                continue;
            }
            if (isOnlyDictionaryEncodingPages(columnMetaData)) {
                Statistics columnStatistics = columnMetaData.getStatistics();
                boolean nullAllowed = columnStatistics == null || columnStatistics.getNumNulls() != 0;
                //  Early abort, predicate already filters block so no more dictionaries need be read
                if (!parquetPredicate.matches(new DictionaryDescriptor(
                        descriptor,
                        nullAllowed,
                        readDictionaryPage(dataSource, columnMetaData, columnIndexStore)))) {
                    return false;
                }
            }
        }
        return true;
    }

    private static Optional readDictionaryPage(
            ParquetDataSource dataSource,
            ColumnChunkMetadata columnMetaData,
            Optional columnIndexStore)
            throws IOException
    {
        int dictionaryPageSize;
        if (columnMetaData.getDictionaryPageOffset() == 0 || columnMetaData.getFirstDataPageOffset() <= columnMetaData.getDictionaryPageOffset()) {
            /*
             * See org.apache.parquet.hadoop.Offsets for reference.
             * The offsets might not contain the proper values in the below cases:
             * - The dictionaryPageOffset might not be set; in this case 0 is returned
             *   (0 cannot be a valid offset because of the MAGIC bytes)
             * - The firstDataPageOffset might point to the dictionary page
             *
             * Such parquet files may have been produced by parquet-mr writers before PARQUET-1977.
             * We find the dictionary page size from OffsetIndex if that exists,
             * otherwise fallback to reading the full column chunk.
             */
            dictionaryPageSize = columnIndexStore.flatMap(index -> getDictionaryPageSize(index, columnMetaData))
                    .orElseGet(() -> toIntExact(columnMetaData.getTotalSize()));
        }
        else {
            dictionaryPageSize = toIntExact(columnMetaData.getFirstDataPageOffset() - columnMetaData.getDictionaryPageOffset());
        }
        // Get the dictionary page header and the dictionary in single read
        Slice buffer = dataSource.readFully(columnMetaData.getStartingPos(), dictionaryPageSize);
        return readPageHeaderWithData(buffer.getInput()).map(data -> decodeDictionaryPage(dataSource.getId(), data, columnMetaData));
    }

    private static Optional getDictionaryPageSize(ColumnIndexStore columnIndexStore, ColumnChunkMetadata columnMetaData)
    {
        OffsetIndex offsetIndex = columnIndexStore.getOffsetIndex(columnMetaData.getPath());
        if (offsetIndex == null) {
            return Optional.empty();
        }
        long rowGroupOffset = columnMetaData.getStartingPos();
        long firstPageOffset = offsetIndex.getOffset(0);
        if (rowGroupOffset < firstPageOffset) {
            return Optional.of(toIntExact(firstPageOffset - rowGroupOffset));
        }
        return Optional.empty();
    }

    private static Optional readPageHeaderWithData(SliceInput inputStream)
    {
        PageHeader pageHeader;
        try {
            pageHeader = Util.readPageHeader(inputStream);
        }
        catch (IOException e) {
            throw new UncheckedIOException(e);
        }

        if (pageHeader.type != PageType.DICTIONARY_PAGE) {
            return Optional.empty();
        }
        DictionaryPageHeader dictionaryHeader = pageHeader.getDictionary_page_header();
        if (dictionaryHeader.getNum_values() > MAX_DICTIONARY_SIZE) {
            return Optional.empty();
        }
        return Optional.of(new PageHeaderWithData(
                pageHeader,
                inputStream.readSlice(pageHeader.getCompressed_page_size())));
    }

    private static DictionaryPage decodeDictionaryPage(ParquetDataSourceId dataSourceId, PageHeaderWithData pageHeaderWithData, ColumnChunkMetadata chunkMetaData)
    {
        PageHeader pageHeader = pageHeaderWithData.pageHeader();
        DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
        ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
        int dictionarySize = dicHeader.getNum_values();

        Slice compressedData = pageHeaderWithData.compressedData();
        try {
            return new DictionaryPage(decompress(dataSourceId, chunkMetaData.getCodec().getParquetCompressionCodec(), compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding);
        }
        catch (IOException e) {
            throw new ParquetDecodingException("Could not decode the dictionary for " + chunkMetaData.getPath(), e);
        }
    }

    private record PageHeaderWithData(PageHeader pageHeader, Slice compressedData)
    {
        private PageHeaderWithData(PageHeader pageHeader, Slice compressedData)
        {
            this.pageHeader = requireNonNull(pageHeader, "pageHeader is null");
            this.compressedData = requireNonNull(compressedData, "compressedData is null");
        }
    }
}