All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.parquet.metadata.PrunedBlockMetadata Maven / Gradle / Ivy

There is a newer version: 464
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.parquet.metadata;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.trino.parquet.ParquetCorruptionException;
import io.trino.parquet.ParquetDataSourceId;
import org.apache.parquet.column.ColumnDescriptor;

import java.util.List;
import java.util.Map;
import java.util.Set;

import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.util.Arrays.asList;
import static java.util.function.Function.identity;

public final class PrunedBlockMetadata
{
    /**
     * Stores only the necessary columns metadata from BlockMetadata and indexes them by path for efficient look-ups
     */
    public static PrunedBlockMetadata createPrunedColumnsMetadata(BlockMetadata blockMetadata, ParquetDataSourceId dataSourceId, Map, ColumnDescriptor> descriptorsByPath)
            throws ParquetCorruptionException
    {
        Set> requiredPaths = descriptorsByPath.keySet();
        Map, ColumnChunkMetadata> columnMetadataByPath = blockMetadata.columns().stream()
                .collect(toImmutableMap(
                        column -> asList(column.getPath().toArray()),
                        identity(),
                        // Same column name may occur more than once when the file is written by case-sensitive tools
                        (oldValue, _) -> oldValue));
        ImmutableMap.Builder, ColumnChunkMetadata> columnMetadataByPathBuilder = ImmutableMap.builderWithExpectedSize(requiredPaths.size());
        for (Map.Entry, ColumnDescriptor> entry : descriptorsByPath.entrySet()) {
            List requiredPath = entry.getKey();
            ColumnDescriptor columnDescriptor = entry.getValue();
            ColumnChunkMetadata columnChunkMetadata = columnMetadataByPath.get(requiredPath);
            if (columnChunkMetadata == null) {
                throw new ParquetCorruptionException(dataSourceId, "Metadata is missing for column: %s", columnDescriptor);
            }
            columnMetadataByPathBuilder.put(requiredPath, columnChunkMetadata);
        }
        return new PrunedBlockMetadata(blockMetadata.rowCount(), dataSourceId, columnMetadataByPathBuilder.buildOrThrow());
    }

    private final long rowCount;
    private final ParquetDataSourceId dataSourceId;
    private final Map, ColumnChunkMetadata> columnMetadataByPath;

    private PrunedBlockMetadata(long rowCount, ParquetDataSourceId dataSourceId, Map, ColumnChunkMetadata> columnMetadataByPath)
    {
        this.rowCount = rowCount;
        this.dataSourceId = dataSourceId;
        this.columnMetadataByPath = columnMetadataByPath;
    }

    public long getRowCount()
    {
        return rowCount;
    }

    public List getColumns()
    {
        return ImmutableList.copyOf(columnMetadataByPath.values());
    }

    public ColumnChunkMetadata getColumnChunkMetaData(ColumnDescriptor columnDescriptor)
            throws ParquetCorruptionException
    {
        ColumnChunkMetadata columnChunkMetadata = columnMetadataByPath.get(asList(columnDescriptor.getPath()));
        if (columnChunkMetadata == null) {
            throw new ParquetCorruptionException(dataSourceId, "Metadata is missing for column: %s", columnDescriptor);
        }
        return columnChunkMetadata;
    }

    @Override
    public String toString()
    {
        return toStringHelper(this)
                .add("rowCount", rowCount)
                .add("columnMetadataByPath", columnMetadataByPath)
                .toString();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy