All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.hive.orc.AggregatedOrcPageSource Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.orc;

import com.facebook.presto.common.Page;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.Decimals;
import com.facebook.presto.common.type.FixedWidthType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.HiveColumnHandle;
import com.facebook.presto.hive.HiveType;
import com.facebook.presto.orc.metadata.Footer;
import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.orc.metadata.statistics.ColumnStatistics;
import com.facebook.presto.spi.ConnectorPageSource;
import com.facebook.presto.spi.function.FunctionHandle;
import com.facebook.presto.spi.function.StandardFunctionResolution;
import io.airlift.slice.Slice;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.List;

import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.spi.plan.AggregationNode.Aggregation;
import static java.lang.Float.floatToRawIntBits;
import static java.util.Objects.requireNonNull;

public class AggregatedOrcPageSource
        implements ConnectorPageSource
{
    private final List columnHandles;
    private final Footer footer;
    private final TypeManager typeManager;
    private final StandardFunctionResolution functionResolution;

    // Prepare the one required record by looking at the aggregations and stats in footer
    private static final int batchSize = 1;

    private boolean completed;
    private long readTimeNanos;
    private long completedBytes;

    public AggregatedOrcPageSource(List columnHandles, Footer footer, TypeManager typeManager, StandardFunctionResolution functionResolution)
    {
        this.columnHandles = requireNonNull(columnHandles, "columnHandles is null");
        this.footer = requireNonNull(footer, "footer is null");
        this.typeManager = requireNonNull(typeManager, "typeManager is null");
        this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
    }

    @Override
    public long getCompletedBytes()
    {
        return completedBytes;
    }

    @Override
    public long getCompletedPositions()
    {
        return 0;
    }

    @Override
    public long getReadTimeNanos()
    {
        return readTimeNanos;
    }

    @Override
    public boolean isFinished()
    {
        return completed;
    }

    @Override
    public Page getNextPage()
    {
        if (completed) {
            return null;
        }

        long start = System.nanoTime();
        Block[] blocks = new Block[columnHandles.size()];
        for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
            HiveColumnHandle columnHandle = columnHandles.get(fieldId);
            Aggregation aggregation = columnHandle.getPartialAggregation().get();
            int columnIndex = columnHandle.getHiveColumnIndex();
            Type type = typeManager.getType(columnHandle.getTypeSignature());
            BlockBuilder blockBuilder = type.createBlockBuilder(null, batchSize, 0);
            FunctionHandle functionHandle = aggregation.getFunctionHandle();

            if (functionResolution.isCountFunction(functionHandle)) {
                if (aggregation.getArguments().isEmpty()) {
                    blockBuilder = blockBuilder.writeLong(footer.getNumberOfRows());
                }
                else {
                    writeNonNullCount(columnIndex, blockBuilder);
                }
                completedBytes += INTEGER.getFixedSize();
            }
            else if (functionResolution.isMaxFunction(functionHandle)) {
                writeMinMax(columnIndex, type, columnHandle.getHiveType(), blockBuilder, false);
            }
            else if (functionResolution.isMinFunction(functionHandle)) {
                writeMinMax(columnIndex, type, columnHandle.getHiveType(), blockBuilder, true);
            }
            else {
                throw new UnsupportedOperationException(aggregation.getFunctionHandle().toString() + " is not supported");
            }
            blocks[fieldId] = blockBuilder.build();
        }

        completed = true;
        readTimeNanos += System.nanoTime() - start;
        return new Page(batchSize, blocks);
    }

    private void writeMinMax(int columnIndex, Type type, HiveType hiveType, BlockBuilder blockBuilder, boolean isMin)
    {
        ColumnStatistics columnStatistics = footer.getFileStats().get(columnIndex + 1);
        OrcType orcType = footer.getTypes().get(columnIndex + 1);

        if (type instanceof FixedWidthType) {
            completedBytes += ((FixedWidthType) type).getFixedSize();
        }

        String orcNoMinMaxMessage = "No min/max found for orc file. Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again";
        switch (orcType.getOrcTypeKind()) {
            case SHORT:
            case INT:
            case LONG: {
                Long value = isMin ? columnStatistics.getIntegerStatistics().getMin() : columnStatistics.getIntegerStatistics().getMax();
                if (value == null) {
                    throw new UnsupportedOperationException(orcNoMinMaxMessage);
                }
                else {
                    blockBuilder.writeLong(value);
                }
                break;
            }

            case TIMESTAMP:
            case DATE: {
                Integer value = isMin ? columnStatistics.getDateStatistics().getMin() : columnStatistics.getDateStatistics().getMax();
                if (value == null) {
                    throw new UnsupportedOperationException(orcNoMinMaxMessage);
                }
                else {
                    blockBuilder.writeLong(Long.valueOf(value));
                }
                break;
            }

            case VARCHAR:
            case CHAR:
            case STRING: {
                Slice value = isMin ? columnStatistics.getStringStatistics().getMin() : columnStatistics.getStringStatistics().getMax();
                if (value == null) {
                    throw new UnsupportedOperationException(orcNoMinMaxMessage);
                }
                else {
                    blockBuilder.writeBytes(value, 0, value.length()).closeEntry();
                    completedBytes += value.length();
                }
                break;
            }

            case FLOAT: {
                Double value = isMin ? columnStatistics.getDoubleStatistics().getMin() : columnStatistics.getDoubleStatistics().getMax();
                if (value == null) {
                    throw new UnsupportedOperationException(orcNoMinMaxMessage);
                }
                else {
                    blockBuilder.writeLong(floatToRawIntBits(value.floatValue()));
                }
                break;
            }

            case DOUBLE: {
                Double value = isMin ? columnStatistics.getDoubleStatistics().getMin() : columnStatistics.getDoubleStatistics().getMax();
                if (value == null) {
                    throw new UnsupportedOperationException(orcNoMinMaxMessage);
                }
                else {
                    type.writeDouble(blockBuilder, value);
                }
                break;
            }

            case DECIMAL:
                BigDecimal value = isMin ? columnStatistics.getDecimalStatistics().getMin() : columnStatistics.getDecimalStatistics().getMax();
                if (value == null) {
                    throw new UnsupportedOperationException(orcNoMinMaxMessage);
                }
                else {
                    Type definedType = hiveType.getType(typeManager);
                    if (Decimals.isShortDecimal(definedType)) {
                        blockBuilder.writeLong(value.unscaledValue().longValue());
                    }
                    else {
                        type.writeSlice(blockBuilder, Decimals.encodeUnscaledValue(value.unscaledValue()));
                    }
                }
                break;

            case BYTE:
            case BOOLEAN:
            case BINARY:
            case UNION:
            case LIST:
            case STRUCT:
            case MAP:
            default:
                throw new IllegalArgumentException("Unsupported type: " + orcType.getOrcTypeKind());
        }
    }

    private void writeNonNullCount(int columnIndex, BlockBuilder blockBuilder)
    {
        ColumnStatistics columnStatistics = footer.getFileStats().get(columnIndex + 1);
        if (!columnStatistics.hasNumberOfValues()) {
            throw new UnsupportedOperationException("Number of values not set for orc file. Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
        }
        blockBuilder.writeLong(columnStatistics.getNumberOfValues());
    }

    @Override
    public long getSystemMemoryUsage()
    {
        return 0;
    }

    @Override
    public void close()
            throws IOException
    {
        // no-op
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy