All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.reader.SliceDirectSelectiveStreamReader Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc.reader;

import com.facebook.presto.common.GenericInternalException;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockLease;
import com.facebook.presto.common.block.ClosingBlockLease;
import com.facebook.presto.common.block.RunLengthEncodedBlock;
import com.facebook.presto.common.block.VariableWidthBlock;
import com.facebook.presto.common.predicate.TupleDomainFilter;
import com.facebook.presto.orc.OrcLocalMemoryContext;
import com.facebook.presto.orc.StreamDescriptor;
import com.facebook.presto.orc.Stripe;
import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.orc.stream.BooleanInputStream;
import com.facebook.presto.orc.stream.ByteArrayInputStream;
import com.facebook.presto.orc.stream.InputStreamSource;
import com.facebook.presto.orc.stream.InputStreamSources;
import com.facebook.presto.orc.stream.LongInputStream;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.airlift.units.DataSize;
import org.openjdk.jol.info.ClassLayout;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Optional;

import static com.facebook.presto.common.array.Arrays.ExpansionFactor.SMALL;
import static com.facebook.presto.common.array.Arrays.ExpansionOption.INITIALIZE;
import static com.facebook.presto.common.array.Arrays.ensureCapacity;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT;
import static com.facebook.presto.orc.reader.ReaderUtils.convertLengthVectorToOffsetVector;
import static com.facebook.presto.orc.reader.ReaderUtils.packByteArrayAndOffsets;
import static com.facebook.presto.orc.reader.ReaderUtils.packByteArrayOffsetsAndNulls;
import static com.facebook.presto.orc.reader.SelectiveStreamReaders.initializeOutputPositions;
import static com.facebook.presto.orc.reader.SliceSelectiveStreamReader.computeTruncatedLength;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getBooleanMissingStreamSource;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getByteArrayMissingStreamSource;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.getLongMissingStreamSource;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.airlift.units.DataSize.Unit.GIGABYTE;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class SliceDirectSelectiveStreamReader
        implements SelectiveStreamReader
{
    private static final int INSTANCE_SIZE = ClassLayout.parseClass(SliceDirectSelectiveStreamReader.class).instanceSize();
    private static final int ONE_GIGABYTE = toIntExact(new DataSize(1, GIGABYTE).toBytes());

    private final SelectiveReaderContext context;
    private final boolean isCharType;
    private final int maxCodePointCount;
    private final OrcLocalMemoryContext systemMemoryContext;

    private int readOffset;

    private InputStreamSource presentStreamSource = getBooleanMissingStreamSource();
    @Nullable
    private BooleanInputStream presentStream;
    @Nullable
    private TupleDomainFilter filter;

    private InputStreamSource dataStreamSource = getByteArrayMissingStreamSource();
    private ByteArrayInputStream dataStream;
    private InputStreamSource lengthStreamSource = getLongMissingStreamSource();
    private LongInputStream lengthStream;

    private boolean rowGroupOpen;
    private boolean[] nulls;

    private int[] outputPositions;
    private int outputPositionCount;

    private boolean allNulls;           // true if all requested positions are null
    private boolean[] isNullVector;     // isNull flags for all positions up to the last positions requested in read()
    private int[] lengthVector;         // lengths for all positions up to the last positions requested in read()
    private int lengthIndex;            // index into lengthVector array
    private int[] offsets;              // offsets of requested positions only; specifies position boundaries for the data array
    private byte[] data;                // data for requested positions only
    private Slice dataAsSlice;          // data array wrapped in Slice
    private boolean valuesInUse;

    public SliceDirectSelectiveStreamReader(SelectiveReaderContext context)
    {
        this.context = requireNonNull(context, "context is null");
        this.systemMemoryContext = context.getSystemMemoryContext().newOrcLocalMemoryContext(this.getClass().getSimpleName());
        this.isCharType = context.getStreamDescriptor().getOrcType().getOrcTypeKind() == OrcType.OrcTypeKind.CHAR;
        this.maxCodePointCount = context.getStreamDescriptor().getOrcType().getLength().orElse(-1);
    }

    @Override
    public int read(int offset, int[] positions, int positionCount)
            throws IOException
    {
        checkState(!valuesInUse, "BlockLease hasn't been closed yet");

        if (!rowGroupOpen) {
            openRowGroup();
        }

        allNulls = false;

        outputPositions = initializeOutputPositions(outputPositions, positions, positionCount);

        systemMemoryContext.setBytes(getRetainedSizeInBytes());

        if (readOffset < offset) {
            skip(offset - readOffset);
        }

        int dataLength = prepareForNextRead(positionCount, positions);

        int streamPosition;

        if (lengthStream == null) {
            streamPosition = readAllNulls(positions, positionCount);
        }
        else if (filter == null) {
            streamPosition = readNoFilter(positions, positionCount, dataLength);
        }
        else {
            streamPosition = readWithFilter(positions, positionCount, dataLength);
        }

        readOffset = offset + streamPosition;
        return outputPositionCount;
    }

    private int readNoFilter(int[] positions, int positionCount, int dataLength)
            throws IOException
    {
        // filter == null implies outputRequired == true

        int totalPositionCount = positions[positionCount - 1] + 1;
        if (useBatchMode(positionCount, totalPositionCount)) {
            if (presentStream == null) {
                if (dataStream != null) {
                    dataStream.next(data, 0, dataLength);
                    convertLengthVectorToOffsetVector(lengthVector, totalPositionCount, offsets);

                    if (totalPositionCount > positionCount) {
                        packByteArrayAndOffsets(data, offsets, positions, positionCount);
                    }
                }
            }
            else {
                if (dataStream != null) {
                    dataStream.next(data, 0, dataLength);
                    convertLengthVectorToOffsetVector(lengthVector, isNullVector, totalPositionCount, offsets);
                }

                if (totalPositionCount > positionCount) {
                    packByteArrayOffsetsAndNulls(data, offsets, isNullVector, positions, positionCount);
                }

                if (context.isNullsAllowed()) {
                    System.arraycopy(isNullVector, 0, nulls, 0, positionCount);
                }
            }
            outputPositionCount = positionCount;
            return totalPositionCount;
        }

        int streamPosition = 0;
        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];
            if (position > streamPosition) {
                skipData(streamPosition, position - streamPosition);
                streamPosition = position;
            }

            int offset = offsets[i];
            if (presentStream != null && isNullVector[position]) {
                if (offsets != null) {
                    offsets[i + 1] = offset;
                }
                nulls[i] = true;
            }
            else {
                int length = lengthVector[lengthIndex];
                int truncatedLength = 0;
                if (length > 0) {
                    dataStream.next(data, offset, offset + length);
                    truncatedLength = computeTruncatedLength(dataAsSlice, offset, length, maxCodePointCount, isCharType);
                }
                offsets[i + 1] = offset + truncatedLength;
                lengthIndex++;
                if (presentStream != null) {
                    nulls[i] = false;
                }
            }
            streamPosition++;
        }
        outputPositionCount = positionCount;
        return streamPosition;
    }

    private int readWithFilter(int[] positions, int positionCount, int dataLength)
            throws IOException
    {
        boolean outputRequired = context.isOutputRequired();

        int totalPositionCount = positions[positionCount - 1] + 1;
        if (useBatchMode(positionCount, totalPositionCount)) {
            if (dataStream != null) {
                dataStream.next(data, 0, dataLength);
            }

            final int filteredPositionCount;
            if (presentStream == null) {
                filteredPositionCount = evaluateFilter(positions, positionCount);

                if (outputRequired && totalPositionCount > filteredPositionCount && filteredPositionCount > 0 && dataStream != null) {
                    packByteArrayAndOffsets(data, offsets, outputPositions, filteredPositionCount);
                }
            }
            else {
                filteredPositionCount = evaluateFilterWithNull(positions, positionCount);

                if (outputRequired) {
                    if (filteredPositionCount > 0) {
                        if (outputRequired && totalPositionCount > filteredPositionCount) {
                            packByteArrayOffsetsAndNulls(data, offsets, isNullVector, outputPositions, filteredPositionCount);
                        }

                        if (context.isNullsAllowed()) {
                            System.arraycopy(isNullVector, 0, nulls, 0, filteredPositionCount);
                        }
                    }
                }
            }

            outputPositionCount = filteredPositionCount;
            return totalPositionCount;
        }

        int streamPosition = 0;
        int dataToSkip = 0;

        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];
            if (position > streamPosition) {
                skipData(streamPosition, position - streamPosition);
                streamPosition = position;
            }

            int offset = outputRequired ? offsets[outputPositionCount] : 0;
            if (presentStream != null && isNullVector[position]) {
                if ((context.isNonDeterministicFilter() && filter.testNull()) || context.isNullsAllowed()) {
                    if (outputRequired) {
                        offsets[outputPositionCount + 1] = offset;
                        nulls[outputPositionCount] = true;
                    }
                    outputPositions[outputPositionCount] = position;
                    outputPositionCount++;
                }
            }
            else {
                int length = lengthVector[lengthIndex];
                int dataOffset = outputRequired ? offset : 0;
                if (filter.testLength(length)) {
                    if (dataStream != null) {
                        dataStream.skip(dataToSkip);
                        dataToSkip = 0;
                        dataStream.next(data, dataOffset, dataOffset + length);
                        if (filter.testBytes(data, dataOffset, length)) {
                            if (outputRequired) {
                                int truncatedLength = computeTruncatedLength(dataAsSlice, dataOffset, length, maxCodePointCount, isCharType);
                                offsets[outputPositionCount + 1] = offset + truncatedLength;
                                if (context.isNullsAllowed() && presentStream != null) {
                                    nulls[outputPositionCount] = false;
                                }
                            }
                            outputPositions[outputPositionCount] = position;
                            outputPositionCount++;
                        }
                    }
                    else {
                        assert length == 0;
                        if (filter.testBytes("".getBytes(), 0, 0)) {
                            if (outputRequired) {
                                offsets[outputPositionCount + 1] = offset;
                                if (context.isNullsAllowed() && presentStream != null) {
                                    nulls[outputPositionCount] = false;
                                }
                            }
                            outputPositions[outputPositionCount] = position;
                            outputPositionCount++;
                        }
                    }
                }
                else {
                    dataToSkip += length;
                }
                lengthIndex++;
            }

            streamPosition++;

            if (filter != null) {
                outputPositionCount -= filter.getPrecedingPositionsToFail();
                int succeedingPositionsToFail = filter.getSucceedingPositionsToFail();
                if (succeedingPositionsToFail > 0) {
                    int positionsToSkip = 0;
                    for (int j = 0; j < succeedingPositionsToFail; j++) {
                        i++;
                        int nextPosition = positions[i];
                        positionsToSkip += 1 + nextPosition - streamPosition;
                        streamPosition = nextPosition + 1;
                    }
                    skipData(streamPosition, positionsToSkip);
                }
            }
        }
        if (dataToSkip > 0) {
            dataStream.skip(dataToSkip);
        }
        return streamPosition;
    }

    private int readAllNulls(int[] positions, int positionCount)
    {
        if (context.isNonDeterministicFilter()) {
            outputPositionCount = 0;
            for (int i = 0; i < positionCount; i++) {
                if (filter.testNull()) {
                    outputPositionCount++;
                }
                else {
                    outputPositionCount -= filter.getPrecedingPositionsToFail();
                    i += filter.getSucceedingPositionsToFail();
                }
            }
        }
        else if (context.isNullsAllowed()) {
            outputPositionCount = positionCount;
        }
        else {
            outputPositionCount = 0;
        }

        allNulls = true;
        return positions[positionCount - 1] + 1;
    }

    private void skip(int items)
            throws IOException
    {
        // in case of an empty varbinary both the presentStream and dataStream are null and only lengthStream is present.
        if (dataStream == null && presentStream != null) {
            presentStream.skip(items);
        }
        else if (presentStream != null) {
            int lengthToSkip = presentStream.countBitsSet(items);
            dataStream.skip(lengthStream.sum(lengthToSkip));
        }
        else {
            long sum = lengthStream.sum(items);
            if (dataStream != null) {
                dataStream.skip(sum);
            }
        }
    }

    private void skipData(int start, int items)
            throws IOException
    {
        int dataToSkip = 0;
        for (int i = 0; i < items; i++) {
            if (presentStream == null || !isNullVector[start + i]) {
                dataToSkip += lengthVector[lengthIndex];
                lengthIndex++;
            }
        }
        // in case of an empty varbinary both the presentStream and dataStream are null and only lengthStream is present.
        if (dataStream != null) {
            dataStream.skip(dataToSkip);
        }
    }

    // No nulls
    private int evaluateFilter(int[] positions, int positionCount)
    {
        int positionsIndex = 0;
        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];
            if (filter.testLength(lengthVector[position])) {
                outputPositions[positionsIndex++] = position;  // compact positions on the fly
            }
            else {
                i += filter.getSucceedingPositionsToFail();
                positionsIndex -= filter.getPrecedingPositionsToFail();
            }
        }

        int filteredPositionCount = 0;
        if (positionsIndex > 0) {
            if (dataStream == null) {
                filteredPositionCount = testEmptyStrings(outputPositions, positionsIndex);
            }
            else {
                int totalPositionCount = outputPositions[positionsIndex - 1] + 1;
                convertLengthVectorToOffsetVector(lengthVector, totalPositionCount, offsets);
                filteredPositionCount = testBytes(outputPositions, positionsIndex);
            }
        }

        return filteredPositionCount;
    }

    private int evaluateFilterWithNull(int[] positions, int positionCount)
    {
        if (dataStream != null) {
            int totalPositionCount = positions[positionCount - 1] + 1;
            convertLengthVectorToOffsetVector(lengthVector, isNullVector, totalPositionCount, offsets);
        }

        int positionsIndex = 0;
        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];

            if (isNullVector[position]) {
                if ((context.isNonDeterministicFilter() && filter.testNull()) || context.isNullsAllowed()) {
                    outputPositions[positionsIndex++] = position;
                }
                else {
                    i += filter.getSucceedingPositionsToFail();
                    positionsIndex -= filter.getPrecedingPositionsToFail();
                }
            }
            else {
                int dataOffset = offsets[position];
                int length = offsets[position + 1] - dataOffset;

                if (filter.testLength(length) && filter.testBytes(data, dataOffset, length)) {
                    outputPositions[positionsIndex++] = position;  // compact positions on the fly
                }
                else {
                    i += filter.getSucceedingPositionsToFail();
                    positionsIndex -= filter.getPrecedingPositionsToFail();
                }
            }
        }

        return positionsIndex;
    }

    private int testEmptyStrings(int[] positions, int positionCount)
    {
        if (context.isNonDeterministicFilter()) {
            int positionsIndex = 0;
            for (int i = 0; i < positionCount; i++) {
                int position = positions[i];

                if (filter.testBytes("".getBytes(), 0, 0)) {
                    positions[positionsIndex++] = position;
                }
                else {
                    i += filter.getSucceedingPositionsToFail();
                    positionsIndex -= filter.getPrecedingPositionsToFail();
                }
            }
            return positionsIndex;
        }

        if (filter.testBytes("".getBytes(), 0, 0)) {
            return positionCount;
        }

        return 0;
    }

    private int testBytes(int[] positions, int positionCount)
    {
        int positionsIndex = 0;
        for (int i = 0; i < positionCount; i++) {
            int position = positions[i];

            int dataOffset = offsets[position];
            int length = offsets[position + 1] - dataOffset;
            if (filter.testBytes(data, dataOffset, length)) {
                positions[positionsIndex++] = position;
            }
            else {
                i += filter.getSucceedingPositionsToFail();
                positionsIndex -= filter.getPrecedingPositionsToFail();
            }
        }
        return positionsIndex;
    }

    @Override
    public int[] getReadPositions()
    {
        return outputPositions;
    }

    @Override
    public Block getBlock(int[] positions, int positionCount)
    {
        checkArgument(outputPositionCount > 0, "outputPositionCount must be greater than zero");
        checkState(context.isOutputRequired(), "This stream reader doesn't produce output");
        checkState(positionCount <= outputPositionCount, "Not enough values");
        checkState(!valuesInUse, "BlockLease hasn't been closed yet");

        if (allNulls) {
            return new RunLengthEncodedBlock(context.getOutputType().createBlockBuilder(null, 1).appendNull().build(), positionCount);
        }

        boolean includeNulls = context.isNullsAllowed() && presentStream != null;

        if (positionCount != outputPositionCount) {
            compactValues(positions, positionCount, includeNulls);
        }

        Block block = new VariableWidthBlock(positionCount, dataAsSlice, offsets, Optional.ofNullable(includeNulls ? nulls : null));
        dataAsSlice = null;
        data = null;
        offsets = null;
        nulls = null;
        return block;
    }

    private void compactValues(int[] positions, int positionCount, boolean includeNulls)
    {
        int positionIndex = 0;
        int nextPosition = positions[positionIndex];
        for (int i = 0; i < outputPositionCount; i++) {
            if (outputPositions[i] < nextPosition) {
                continue;
            }

            assert outputPositions[i] == nextPosition;

            int length = offsets[i + 1] - offsets[i];
            if (length > 0) {
                System.arraycopy(data, offsets[i], data, offsets[positionIndex], length);
            }
            offsets[positionIndex + 1] = offsets[positionIndex] + length;
            outputPositions[positionIndex] = nextPosition;

            if (includeNulls) {
                nulls[positionIndex] = nulls[i];
            }

            positionIndex++;
            if (positionIndex >= positionCount) {
                break;
            }
            nextPosition = positions[positionIndex];
        }

        outputPositionCount = positionCount;
    }

    @Override
    public BlockLease getBlockView(int[] positions, int positionCount)
    {
        checkArgument(outputPositionCount > 0, "outputPositionCount must be greater than zero");
        checkState(context.isOutputRequired(), "This stream reader doesn't produce output");
        checkState(positionCount <= outputPositionCount, "Not enough values");
        checkState(!valuesInUse, "BlockLease hasn't been closed yet");

        if (allNulls) {
            return newLease(new RunLengthEncodedBlock(context.getOutputType().createBlockBuilder(null, 1).appendNull().build(), positionCount));
        }
        boolean includeNulls = context.isNullsAllowed() && presentStream != null;
        if (positionCount != outputPositionCount) {
            compactValues(positions, positionCount, includeNulls);
        }
        return newLease(new VariableWidthBlock(positionCount, dataAsSlice, offsets, Optional.ofNullable(includeNulls ? nulls : null)));
    }

    private BlockLease newLease(Block block)
    {
        valuesInUse = true;
        return ClosingBlockLease.newLease(block, () -> valuesInUse = false);
    }

    @Override
    public void throwAnyError(int[] positions, int positionCount)
    {
    }

    @Override
    public void close()
    {
        dataAsSlice = null;
        data = null;
        lengthVector = null;
        isNullVector = null;
        offsets = null;
        outputPositions = null;
        systemMemoryContext.close();
    }

    private void openRowGroup()
            throws IOException
    {
        presentStream = presentStreamSource.openStream();
        filter = context.getFilter(presentStream);
        lengthStream = lengthStreamSource.openStream();
        dataStream = dataStreamSource.openStream();

        rowGroupOpen = true;
    }

    @Override
    public void startStripe(Stripe stripe)
    {
        presentStreamSource = getBooleanMissingStreamSource();
        lengthStreamSource = getLongMissingStreamSource();
        dataStreamSource = getByteArrayMissingStreamSource();

        readOffset = 0;

        presentStream = null;
        filter = null;
        lengthStream = null;
        dataStream = null;

        rowGroupOpen = false;
    }

    @Override
    public void startRowGroup(InputStreamSources dataStreamSources)
    {
        StreamDescriptor streamDescriptor = context.getStreamDescriptor();
        presentStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, PRESENT, BooleanInputStream.class);
        lengthStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, LENGTH, LongInputStream.class);
        dataStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, DATA, ByteArrayInputStream.class);

        readOffset = 0;

        presentStream = null;
        filter = null;
        lengthStream = null;
        dataStream = null;

        rowGroupOpen = false;
    }

    @Override
    public long getRetainedSizeInBytes()
    {
        return INSTANCE_SIZE + sizeOf(offsets) + sizeOf(outputPositions) + sizeOf(data) + sizeOf(nulls) + sizeOf(lengthVector) + sizeOf(isNullVector);
    }

    @VisibleForTesting
    public void resetDataStream()
    {
        dataStream = null;
    }

    private int prepareForNextRead(int positionCount, int[] positions)
            throws IOException
    {
        lengthIndex = 0;
        outputPositionCount = 0;

        int totalLength = 0;
        int maxLength = 0;
        int dataLength = 0;

        int totalPositions = positions[positionCount - 1] + 1;
        int nullCount = 0;
        if (presentStream != null) {
            isNullVector = ensureCapacity(isNullVector, totalPositions);
            nullCount = presentStream.getUnsetBits(totalPositions, isNullVector);
        }

        if (lengthStream != null) {
            int nonNullCount = totalPositions - nullCount;
            lengthVector = ensureCapacity(lengthVector, nonNullCount);
            lengthStream.next(lengthVector, nonNullCount);

            if (useBatchMode(positionCount, totalPositions)) {
                for (int i = 0; i < nonNullCount; i++) {
                    totalLength += lengthVector[i];
                    maxLength = Math.max(maxLength, lengthVector[i]);
                }
            }
            else {
                int positionIndex = 0;
                int lengthIndex = 0;
                for (int i = 0; i < totalPositions; i++) {
                    boolean isNotNull = nullCount == 0 || !isNullVector[i];
                    if (i == positions[positionIndex]) {
                        if (isNotNull) {
                            totalLength += lengthVector[lengthIndex];
                            maxLength = Math.max(maxLength, lengthVector[lengthIndex]);
                            lengthIndex++;
                        }
                        positionIndex++;
                    }
                    else if (isNotNull) {
                        lengthIndex++;
                    }
                }
            }

            // TODO Do not throw if outputRequired == false
            if (totalLength > ONE_GIGABYTE) {
                throw new GenericInternalException(
                        format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]",
                                context.getStreamDescriptor().getFieldName(), positionCount,
                                context.getStreamDescriptor().getOrcDataSourceId()));
            }
        }

        if (context.isOutputRequired()) {
            if (presentStream != null && context.isNullsAllowed()) {
                nulls = ensureCapacity(nulls, positionCount);
            }
            dataLength = totalLength;
            data = ensureCapacity(data, totalLength);
            offsets = ensureCapacity(offsets, totalPositions + 1, SMALL, INITIALIZE);
        }
        else {
            if (useBatchMode(positionCount, totalPositions)) {
                dataLength = totalLength;
                if (filter != null) {
                    offsets = ensureCapacity(offsets, totalPositions + 1, SMALL, INITIALIZE);
                }
            }
            else {
                dataLength = maxLength;
            }

            data = ensureCapacity(data, dataLength);
        }

        dataAsSlice = Slices.wrappedBuffer(data);
        return dataLength;
    }

    private boolean useBatchMode(int positionCount, int totalPositionCount)
    {
        // maxCodePointCount < 0 means it's unbounded varchar VARCHAR.
        // If the types are VARCHAR(N) or CHAR(N), the length of the string need to be calculated and truncated.
        if (lengthStream == null || maxCodePointCount >= 0) {
            return false;
        }

        double inputFilterRate = (double) (totalPositionCount - positionCount) / totalPositionCount;
        if (filter == null) {  // readNoFilter
            // When there is no filter, batch mode performs better for almost all inputFilterRate.
            // But to limit data buffer size, we enable it for the range of [0.0f, 0.5f]
            if (inputFilterRate >= 0.0f && inputFilterRate <= 0.5f) {
                return true;
            }

            return false;
        }
        else { // readWithFilter
            // When there is filter, batch mode performs better for almost all inputFilterRate except when inputFilterRate is around 0.1f.
            // To limit data buffer size, we enable it for the range of [0.0f, 0.05f] and [0.15f, 0.5f]
            if (inputFilterRate >= 0.0f && inputFilterRate <= 0.05f || inputFilterRate >= 0.15f && inputFilterRate <= 0.5f) {
                return true;
            }

            return false;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy