All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.writer.ColumnWriterUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc.writer;

import com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint;
import com.facebook.presto.orc.checkpoint.StreamCheckpoint;
import com.facebook.presto.orc.metadata.RowGroupIndex;
import com.facebook.presto.orc.metadata.statistics.ColumnStatistics;
import com.facebook.presto.orc.stream.PresentOutputStream;
import com.facebook.presto.orc.stream.ValueOutputStream;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

public class ColumnWriterUtils
{
    private ColumnWriterUtils()
    {
        // utils
    }

    /**
     * Build RowGroupIndex using column statistics and checkpoints.
     */
    @SafeVarargs
    public static List buildRowGroupIndexes(
            boolean compressed,
            List rowGroupColumnStatistics,
            Optional> prependCheckpoints,
            PresentOutputStream presentStream,
            ValueOutputStream... dataStreams)
    {
        ImmutableList.Builder rowGroupIndexes = ImmutableList.builder();
        Optional> presentCheckpoints = presentStream.getCheckpoints();

        List> dataCheckpoints = Arrays.stream(dataStreams)
                .map(ValueOutputStream::getCheckpoints)
                .collect(Collectors.toList());

        List positions = new ArrayList<>();
        for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
            int groupId = i;
            Optional prependCheckpoint = prependCheckpoints.map(checkpoints -> checkpoints.get(groupId));
            Optional presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));

            // prepend and present checkpoints always come first
            prependCheckpoint.ifPresent(checkpoint -> positions.addAll(checkpoint.toPositionList(compressed)));
            presentCheckpoint.ifPresent(checkpoint -> positions.addAll(checkpoint.toPositionList(compressed)));

            // add data checkpoints
            for (List dataCheckpoint : dataCheckpoints) {
                StreamCheckpoint streamCheckpoint = dataCheckpoint.get(groupId);
                positions.addAll(streamCheckpoint.toPositionList(compressed));
            }

            ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
            rowGroupIndexes.add(new RowGroupIndex(Ints.toArray(positions), columnStatistics));
            positions.clear();
        }

        return rowGroupIndexes.build();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy