All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.reader.SliceDictionarySelectiveReader Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc.reader;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockLease;
import com.facebook.presto.common.block.ClosingBlockLease;
import com.facebook.presto.common.block.DictionaryBlock;
import com.facebook.presto.common.block.RunLengthEncodedBlock;
import com.facebook.presto.common.block.VariableWidthBlock;
import com.facebook.presto.common.predicate.TupleDomainFilter;
import com.facebook.presto.orc.OrcCorruptionException;
import com.facebook.presto.orc.OrcLocalMemoryContext;
import com.facebook.presto.orc.StreamDescriptor;
import com.facebook.presto.orc.Stripe;
import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.orc.stream.BooleanInputStream;
import com.facebook.presto.orc.stream.ByteArrayInputStream;
import com.facebook.presto.orc.stream.InputStreamSource;
import com.facebook.presto.orc.stream.InputStreamSources;
import com.facebook.presto.orc.stream.LongInputStream;
import com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.slice.Slice;
import org.openjdk.jol.info.ClassLayout;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Arrays;
import java.util.Optional;

import static com.facebook.presto.common.array.Arrays.ExpansionFactor.MEDIUM;
import static com.facebook.presto.common.array.Arrays.ExpansionOption.PRESERVE;
import static com.facebook.presto.common.array.Arrays.ensureCapacity;
import static com.facebook.presto.orc.metadata.OrcType.OrcTypeKind.CHAR;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.IN_DICTIONARY;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_GROUP_DICTIONARY;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_GROUP_DICTIONARY_LENGTH;
import static com.facebook.presto.orc.reader.SelectiveStreamReaders.initializeOutputPositions;
import static com.facebook.presto.orc.reader.SliceSelectiveStreamReader.computeTruncatedLength;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getBooleanMissingStreamSource;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getByteArrayMissingStreamSource;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getLongMissingStreamSource;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getRowGroupDictionaryLengthMissingStreamSource;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.airlift.slice.Slices.wrappedBuffer;
import static java.lang.Math.toIntExact;
import static java.util.Arrays.fill;
import static java.util.Objects.requireNonNull;

public class SliceDictionarySelectiveReader
        implements SelectiveStreamReader
{
    private static final int INSTANCE_SIZE = ClassLayout.parseClass(SliceDictionarySelectiveReader.class).instanceSize();

    // filter evaluation states, using byte constants instead of enum as its memory efficient
    private static final byte FILTER_NOT_EVALUATED = 0;
    private static final byte FILTER_PASSED = 1;
    private static final byte FILTER_FAILED = 2;

    private static final byte[] EMPTY_DICTIONARY_DATA = new byte[0];
    // add one extra entry for null after stripe/rowGroup dictionary
    private static final int[] EMPTY_DICTIONARY_OFFSETS = new int[2];

    // Each rowgroup has roughly 10K rows, and each batch reads 1K rows. So there're about 10 batches in a rowgroup.
    private static final int BATCHES_PER_ROWGROUP = 10;
    // MATERIALIZATION_RATIO should be greater than or equal to 1.0f to compensate the extra CPU to materialize blocks.
    private static final float MATERIALIZATION_RATIO = 2.0f;

    private final SelectiveReaderContext context;
    private final int maxCodePointCount;
    private final boolean isCharType;

    private byte[] dictionaryData = EMPTY_DICTIONARY_DATA;
    private int[] dictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
    private int[] stripeDictionaryLength = new int[0];
    private int[] rowGroupDictionaryLength = new int[0];
    private byte[] evaluationStatus;
    private byte[] valueWithPadding;

    private int readOffset;

    private VariableWidthBlock dictionary = new VariableWidthBlock(1, wrappedBuffer(EMPTY_DICTIONARY_DATA), EMPTY_DICTIONARY_OFFSETS, Optional.of(new boolean[] {true}));

    private InputStreamSource presentStreamSource = getBooleanMissingStreamSource();
    @Nullable
    private BooleanInputStream presentStream;
    @Nullable
    private TupleDomainFilter filter;

    private BooleanInputStream inDictionaryStream;

    private InputStreamSource stripeDictionaryDataStreamSource = getByteArrayMissingStreamSource();
    private InputStreamSource stripeDictionaryLengthStreamSource = getLongMissingStreamSource();
    private boolean stripeDictionaryOpen;
    // The dictionaries will be wrapped in getBlock(). It's set to false when opening a new dictionary (be it stripe dictionary or rowgroup dictionary). When there is only stripe
    // dictionary but no rowgroup dictionaries, we shall set it to false only when opening the stripe dictionary while not for every rowgroup. It is set to true when the dictionary
    // is wrapped up in wrapDictionaryIfNecessary().
    private boolean dictionaryWrapped;

    private int stripeDictionarySize;
    private int currentDictionarySize;

    private InputStreamSource rowGroupDictionaryDataStreamSource = getByteArrayMissingStreamSource();
    private InputStreamSource inDictionaryStreamSource = getBooleanMissingStreamSource();
    private InputStreamSource rowGroupDictionaryLengthStreamSource = getRowGroupDictionaryLengthMissingStreamSource();

    private InputStreamSource dataStreamSource = getLongMissingStreamSource();
    private LongInputStream dataStream;

    private boolean rowGroupOpen;
    private OrcLocalMemoryContext systemMemoryContext;

    private int[] values;
    private boolean allNulls;
    private int[] outputPositions;
    private int outputPositionCount;
    private boolean valuesInUse;

    public SliceDictionarySelectiveReader(SelectiveReaderContext context)
    {
        this.context = requireNonNull(context, "context is null");
        this.systemMemoryContext = context.getSystemMemoryContext().newOrcLocalMemoryContext(this.getClass().getSimpleName());
        OrcType orcType = context.getStreamDescriptor().getOrcType();
        this.maxCodePointCount = orcType == null ? 0 : orcType.getLength().orElse(-1);
        this.valueWithPadding = maxCodePointCount < 0 ? null : new byte[maxCodePointCount];
        this.isCharType = orcType.getOrcTypeKind() == CHAR;
    }

    @Override
    public int read(int offset, int[] positions, int positionCount)
            throws IOException
    {
        checkState(!valuesInUse, "BlockLease hasn't been closed yet");

        if (!rowGroupOpen) {
            openRowGroup();
        }

        allNulls = false;

        if (context.isOutputRequired()) {
            values = ensureCapacity(values, positionCount);
        }

        outputPositions = initializeOutputPositions(outputPositions, positions, positionCount);

        systemMemoryContext.setBytes(getRetainedSizeInBytes());

        if (readOffset < offset) {
            skip(offset - readOffset);
        }

        outputPositionCount = 0;
        int streamPosition;

        if (dataStream == null && presentStream != null) {
            streamPosition = readAllNulls(positions, positionCount);
        }
        else if (filter == null) {
            streamPosition = readNoFilter(positions, positionCount);
        }
        else {
            streamPosition = readWithFilter(positions, positionCount);
        }

        readOffset = offset + streamPosition;

        return outputPositionCount;
    }

    private int readNoFilter(int[] positions, int positionCount)
            throws IOException
    {
        int streamPosition = 0;
        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];
            if (position > streamPosition) {
                skip(position - streamPosition);
                streamPosition = position;
            }

            if (presentStream != null && !presentStream.nextBit()) {
                values[i] = currentDictionarySize - 1;
            }
            else {
                boolean isInRowDictionary = inDictionaryStream != null && !inDictionaryStream.nextBit();
                int index = toIntExact(dataStream.next());
                values[i] = isInRowDictionary ? stripeDictionarySize + index : index;
            }
            streamPosition++;
        }
        outputPositionCount = positionCount;
        return streamPosition;
    }

    private int readWithFilter(int[] positions, int positionCount)
            throws IOException
    {
        int streamPosition = 0;
        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];
            if (position > streamPosition) {
                skip(position - streamPosition);
                streamPosition = position;
            }

            if (presentStream != null && !presentStream.nextBit()) {
                if ((context.isNonDeterministicFilter() && filter.testNull()) || context.isNullsAllowed()) {
                    if (context.isOutputRequired()) {
                        values[outputPositionCount] = currentDictionarySize - 1;
                    }
                    outputPositions[outputPositionCount] = position;
                    outputPositionCount++;
                }
            }
            else {
                boolean inRowDictionary = inDictionaryStream != null && !inDictionaryStream.nextBit();
                int rawIndex = toIntExact(dataStream.next());
                int index = inRowDictionary ? stripeDictionarySize + rawIndex : rawIndex;
                int length = dictionaryOffsetVector[index + 1] - dictionaryOffsetVector[index];

                if (evaluationStatus == null) {
                    evaluateFilter(position, index, length);
                }
                else {
                    switch (evaluationStatus[index]) {
                        case FILTER_FAILED: {
                            break;
                        }
                        case FILTER_PASSED: {
                            if (context.isOutputRequired()) {
                                values[outputPositionCount] = index;
                            }
                            outputPositions[outputPositionCount] = position;
                            outputPositionCount++;
                            break;
                        }
                        case FILTER_NOT_EVALUATED: {
                            evaluationStatus[index] = evaluateFilter(position, index, length);
                            break;
                        }
                        default: {
                            throw new IllegalStateException("invalid evaluation state");
                        }
                    }
                }
            }
            streamPosition++;

            if (filter != null) {
                outputPositionCount -= filter.getPrecedingPositionsToFail();
                int succeedingPositionsToFail = filter.getSucceedingPositionsToFail();
                if (succeedingPositionsToFail > 0) {
                    int positionsToSkip = 0;
                    for (int j = 0; j < succeedingPositionsToFail; j++) {
                        i++;
                        int nextPosition = positions[i];
                        positionsToSkip += 1 + nextPosition - streamPosition;
                        streamPosition = nextPosition + 1;
                    }
                    skip(positionsToSkip);
                }
            }
        }
        return streamPosition;
    }

    private byte evaluateFilter(int position, int index, int length)
    {
        if (!filter.testLength(length)) {
            return FILTER_FAILED;
        }

        int currentLength = dictionaryOffsetVector[index + 1] - dictionaryOffsetVector[index];
        if (isCharType && length != currentLength) {
            System.arraycopy(dictionaryData, dictionaryOffsetVector[index], valueWithPadding, 0, currentLength);
            Arrays.fill(valueWithPadding, currentLength, length, (byte) ' ');
            if (!filter.testBytes(valueWithPadding, 0, length)) {
                return FILTER_FAILED;
            }
        }
        else if (!filter.testBytes(dictionaryData, dictionaryOffsetVector[index], length)) {
            return FILTER_FAILED;
        }

        if (context.isOutputRequired()) {
            values[outputPositionCount] = index;
        }
        outputPositions[outputPositionCount] = position;
        outputPositionCount++;
        return FILTER_PASSED;
    }

    private int readAllNulls(int[] positions, int positionCount)
            throws IOException
    {
        presentStream.skip(positions[positionCount - 1]);

        if (context.isNonDeterministicFilter()) {
            outputPositionCount = 0;
            for (int i = 0; i < positionCount; i++) {
                if (filter.testNull()) {
                    outputPositionCount++;
                }
                else {
                    outputPositionCount -= filter.getPrecedingPositionsToFail();
                    i += filter.getSucceedingPositionsToFail();
                }
            }
        }
        else if (context.isNullsAllowed()) {
            outputPositionCount = positionCount;
        }
        else {
            outputPositionCount = 0;
        }

        allNulls = true;
        return positions[positionCount - 1] + 1;
    }

    private void skip(int items)
            throws IOException
    {
        if (presentStream != null) {
            int dataToSkip = presentStream.countBitsSet(items);
            if (inDictionaryStream != null) {
                inDictionaryStream.skip(dataToSkip);
            }
            if (dataStream != null) {
                dataStream.skip(dataToSkip);
            }
        }
        else {
            if (inDictionaryStream != null) {
                inDictionaryStream.skip(items);
            }
            dataStream.skip(items);
        }
    }

    @Override
    public int[] getReadPositions()
    {
        return outputPositions;
    }

    @Override
    public Block getBlock(int[] positions, int positionCount)
    {
        checkArgument(outputPositionCount > 0, "outputPositionCount must be greater than zero");
        checkState(context.isOutputRequired(), "This stream reader doesn't produce output");
        checkState(positionCount <= outputPositionCount, "Not enough values");
        checkState(!valuesInUse, "BlockLease hasn't been closed yet");

        if (allNulls) {
            return new RunLengthEncodedBlock(context.getOutputType().createBlockBuilder(null, 1).appendNull().build(), positionCount);
        }

        // compact values(ids) array, and calculate 1) the slice sizeInBytes if materialized, and 2) number of nulls
        compactValues(positions, positionCount);

        long blockSizeInBytes = 0;
        int nullCount = 0;
        for (int i = 0; i < positionCount; i++) {
            int id = values[i];
            blockSizeInBytes += dictionaryOffsetVector[id + 1] - dictionaryOffsetVector[id];
            if (id == currentDictionarySize - 1) {
                nullCount++;
            }
        }

        // If all selected positions are null, just return RLE block.
        if (nullCount == positionCount) {
            return new RunLengthEncodedBlock(context.getOutputType().createBlockBuilder(null, 1).appendNull().build(), positionCount);
        }

        // If the expected materialized size of the output block is smaller than a certain ratio of the dictionary size, we will materialize the values
        int dictionarySizeInBytes = dictionaryOffsetVector[currentDictionarySize - 1];
        if (blockSizeInBytes * BATCHES_PER_ROWGROUP < dictionarySizeInBytes / MATERIALIZATION_RATIO) {
            return getMaterializedBlock(positionCount, blockSizeInBytes, nullCount);
        }

        wrapDictionaryIfNecessary();

        int[] valuesCopy = Arrays.copyOf(values, positionCount);
        return new DictionaryBlock(positionCount, dictionary, valuesCopy);
    }

    @Override
    public BlockLease getBlockView(int[] positions, int positionCount)
    {
        checkArgument(outputPositionCount > 0, "outputPositionCount must be greater than zero");
        checkState(context.isOutputRequired(), "This stream reader doesn't produce output");
        checkState(positionCount <= outputPositionCount, "Not enough values");
        checkState(!valuesInUse, "BlockLease hasn't been closed yet");

        if (allNulls) {
            return newLease(new RunLengthEncodedBlock(context.getOutputType().createBlockBuilder(null, 1).appendNull().build(), positionCount));
        }
        if (positionCount < outputPositionCount) {
            compactValues(positions, positionCount);
        }
        wrapDictionaryIfNecessary();
        return newLease(new DictionaryBlock(positionCount, dictionary, values));
    }

    private void wrapDictionaryIfNecessary()
    {
        if (dictionaryWrapped) {
            return;
        }

        boolean[] isNullVector = new boolean[currentDictionarySize];
        isNullVector[currentDictionarySize - 1] = true;

        byte[] dictionaryDataCopy = Arrays.copyOf(dictionaryData, dictionaryOffsetVector[currentDictionarySize]);
        int[] dictionaryOffsetVectorCopy = Arrays.copyOf(dictionaryOffsetVector, currentDictionarySize + 1);
        dictionary = new VariableWidthBlock(currentDictionarySize, wrappedBuffer(dictionaryDataCopy), dictionaryOffsetVectorCopy, Optional.of(isNullVector));

        dictionaryWrapped = true;
    }

    private void compactValues(int[] positions, int positionCount)
    {
        int positionIndex = 0;
        int nextPosition = positions[positionIndex];
        for (int i = 0; i < outputPositionCount; i++) {
            if (outputPositions[i] < nextPosition) {
                continue;
            }

            assert outputPositions[i] == nextPosition;

            values[positionIndex] = values[i];
            outputPositions[positionIndex] = nextPosition;

            positionIndex++;
            if (positionIndex >= positionCount) {
                break;
            }
            nextPosition = positions[positionIndex];
        }

        outputPositionCount = positionCount;
    }

    @Override
    public void throwAnyError(int[] positions, int positionCount)
    {
    }

    private void openRowGroup()
            throws IOException
    {
        presentStream = presentStreamSource.openStream();
        filter = context.getFilter(presentStream);

        // read the dictionary
        if (!stripeDictionaryOpen) {
            if (stripeDictionarySize > 0) {
                // resize the dictionary lengths array if necessary
                if (stripeDictionaryLength.length < stripeDictionarySize) {
                    stripeDictionaryLength = new int[stripeDictionarySize];
                }

                // read the lengths
                LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
                if (lengthStream == null) {
                    throw new OrcCorruptionException(context.getStreamDescriptor().getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
                }
                lengthStream.nextIntVector(stripeDictionarySize, stripeDictionaryLength, 0);

                long dataLength = 0;
                for (int i = 0; i < stripeDictionarySize; i++) {
                    dataLength += stripeDictionaryLength[i];
                }

                dictionaryData = ensureCapacity(dictionaryData, toIntExact(dataLength));
                dictionaryOffsetVector = ensureCapacity(dictionaryOffsetVector, stripeDictionarySize + 2);

                // read dictionary values
                ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
                readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
            }
            else {
                dictionaryData = EMPTY_DICTIONARY_DATA;
                dictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
            }

            // If there is no rowgroup dictionary, we only need to wrap the stripe dictionary once per stripe because wrapping dictionary is very expensive.
            dictionaryWrapped = false;
        }

        // read row group dictionary
        RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
        if (dictionaryLengthStream != null) {
            int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();

            rowGroupDictionaryLength = ensureCapacity(rowGroupDictionaryLength, rowGroupDictionarySize);

            // read the lengths
            dictionaryLengthStream.nextIntVector(rowGroupDictionarySize, rowGroupDictionaryLength, 0);
            long dataLength = 0;
            for (int i = 0; i < rowGroupDictionarySize; i++) {
                dataLength += rowGroupDictionaryLength[i];
            }

            dictionaryData = ensureCapacity(
                    dictionaryData,
                    dictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength),
                    MEDIUM,
                    PRESERVE);

            dictionaryOffsetVector = ensureCapacity(dictionaryOffsetVector,
                    stripeDictionarySize + rowGroupDictionarySize + 2,
                    MEDIUM,
                    PRESERVE);

            dictionaryWrapped = false;

            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
            currentDictionarySize = stripeDictionarySize + rowGroupDictionarySize + 1;

            initiateEvaluationStatus(stripeDictionarySize + rowGroupDictionarySize + 1);
        }
        else {
            // there is no row group dictionary so use the stripe dictionary
            currentDictionarySize = stripeDictionarySize + 1;
            initiateEvaluationStatus(stripeDictionarySize + 1);
        }

        dictionaryOffsetVector[currentDictionarySize] = dictionaryOffsetVector[currentDictionarySize - 1];
        stripeDictionaryOpen = true;
        inDictionaryStream = inDictionaryStreamSource.openStream();
        dataStream = dataStreamSource.openStream();

        rowGroupOpen = true;
    }

    // Reads dictionary into data and offsetVector
    private static void readDictionary(
            @Nullable ByteArrayInputStream dictionaryDataStream,
            int dictionarySize,
            int[] dictionaryLengthVector,
            int offsetVectorOffset,
            byte[] data,
            int[] offsetVector,
            int maxCodePointCount,
            boolean isCharType)
            throws IOException
    {
        Slice slice = wrappedBuffer(data);

        // initialize the offset if necessary;
        // otherwise, use the previous offset
        if (offsetVectorOffset == 0) {
            offsetVector[0] = 0;
        }

        // truncate string and update offsets
        for (int i = 0; i < dictionarySize; i++) {
            int offsetIndex = offsetVectorOffset + i;
            int offset = offsetVector[offsetIndex];
            int length = dictionaryLengthVector[i];

            int truncatedLength;
            if (length > 0) {
                // read data without truncation
                dictionaryDataStream.next(data, offset, offset + length);
                // adjust offsets with truncated length
                truncatedLength = computeTruncatedLength(slice, offset, length, maxCodePointCount, isCharType);
                verify(truncatedLength >= 0);
            }
            else {
                truncatedLength = 0;
            }
            offsetVector[offsetIndex + 1] = offsetVector[offsetIndex] + truncatedLength;
        }
    }

    @Override
    public void startStripe(Stripe stripe)
    {
        InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
        stripeDictionaryDataStreamSource = dictionaryStreamSources.getInputStreamSource(context.getStreamDescriptor(), DICTIONARY_DATA, ByteArrayInputStream.class);
        stripeDictionaryLengthStreamSource = dictionaryStreamSources.getInputStreamSource(context.getStreamDescriptor(), LENGTH, LongInputStream.class);
        stripeDictionarySize = stripe.getColumnEncodings().get(context.getStreamDescriptor().getStreamId())
                .getColumnEncoding(context.getStreamDescriptor().getSequence())
                .getDictionarySize();
        stripeDictionaryOpen = false;

        presentStreamSource = getBooleanMissingStreamSource();
        dataStreamSource = getLongMissingStreamSource();

        inDictionaryStreamSource = getBooleanMissingStreamSource();
        rowGroupDictionaryLengthStreamSource = getRowGroupDictionaryLengthMissingStreamSource();
        rowGroupDictionaryDataStreamSource = getByteArrayMissingStreamSource();

        readOffset = 0;

        presentStream = null;
        filter = null;
        inDictionaryStream = null;
        dataStream = null;

        rowGroupOpen = false;
    }

    @Override
    public void startRowGroup(InputStreamSources dataStreamSources)
    {
        StreamDescriptor streamDescriptor = context.getStreamDescriptor();
        presentStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, PRESENT, BooleanInputStream.class);
        dataStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, DATA, LongInputStream.class);

        // the "in dictionary" stream signals if the value is in the stripe or row group dictionary
        inDictionaryStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, IN_DICTIONARY, BooleanInputStream.class);
        rowGroupDictionaryLengthStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, ROW_GROUP_DICTIONARY_LENGTH, RowGroupDictionaryLengthInputStream.class);
        rowGroupDictionaryDataStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, ROW_GROUP_DICTIONARY, ByteArrayInputStream.class);

        readOffset = 0;

        presentStream = null;
        filter = null;
        inDictionaryStream = null;
        dataStream = null;

        rowGroupOpen = false;
    }

    @Override
    public void close()
    {
        dictionary = null;
        dictionaryData = null;
        dictionaryOffsetVector = null;
        rowGroupDictionaryLength = null;
        stripeDictionaryLength = null;
        values = null;
        outputPositions = null;
        systemMemoryContext.close();
    }

    @Override
    public long getRetainedSizeInBytes()
    {
        return INSTANCE_SIZE +
                sizeOf(values) +
                sizeOf(dictionaryData) +
                sizeOf(dictionaryOffsetVector) +
                sizeOf(stripeDictionaryLength) +
                sizeOf(rowGroupDictionaryLength) +
                sizeOf(evaluationStatus) +
                sizeOf(valueWithPadding) +
                dictionary.getRetainedSizeInBytes();
    }

    @VisibleForTesting
    public void resetDataStream()
    {
        dataStream = null;
    }

    private void initiateEvaluationStatus(int positionCount)
    {
        verify(positionCount > 0);
        if (context.isNonDeterministicFilter() && !context.isLowMemory()) {
            evaluationStatus = ensureCapacity(evaluationStatus, positionCount - 1);
            fill(evaluationStatus, 0, evaluationStatus.length, FILTER_NOT_EVALUATED);
        }
        else {
            evaluationStatus = null;
        }
    }

    private BlockLease newLease(Block block)
    {
        valuesInUse = true;
        return ClosingBlockLease.newLease(block, () -> valuesInUse = false);
    }

    private Block getMaterializedBlock(int positionCount, long blockSizeInBytes, int nullCount)
    {
        byte[] sliceData = new byte[toIntExact(blockSizeInBytes)];
        int[] offsetVector = new int[positionCount + 1];
        int currentOffset = 0;
        for (int i = 0; i < positionCount; i++) {
            int id = values[i];
            int offset = dictionaryOffsetVector[id];
            int length = dictionaryOffsetVector[id + 1] - offset;
            System.arraycopy(dictionaryData, offset, sliceData, currentOffset, length);

            currentOffset += length;
            offsetVector[i + 1] = currentOffset;
        }

        if (nullCount > 0) {
            boolean[] isNullVector = new boolean[positionCount];
            for (int i = 0; i < positionCount; i++) {
                if (values[i] == currentDictionarySize - 1) {
                    isNullVector[i] = true;
                }
            }
            return new VariableWidthBlock(positionCount, wrappedBuffer(sliceData), offsetVector, Optional.of(isNullVector));
        }
        else {
            return new VariableWidthBlock(positionCount, wrappedBuffer(sliceData), offsetVector, Optional.empty());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy