io.deephaven.engine.table.iterators.ChunkedColumnIterator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of deephaven-engine-api Show documentation
Engine API: Engine API module, suitable as a compile-time dependency for most queries
The newest version!
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.engine.table.iterators;

import io.deephaven.base.verify.Require;
import io.deephaven.chunk.Chunk;
import io.deephaven.chunk.ChunkType;
import io.deephaven.chunk.attributes.Any;
import io.deephaven.engine.rowset.RowSequence;
import io.deephaven.engine.table.ChunkSource;
import io.deephaven.util.SafeCloseable;
import org.jetbrains.annotations.NotNull;

import java.util.NoSuchElementException;

import static io.deephaven.chunk.util.pools.ChunkPoolConstants.SMALLEST_POOLED_CHUNK_CAPACITY;

/**
 * Iteration support for values supplied by a {@link ChunkSource}. Implementations retrieve {@link Chunk chunks} of
 * values at a time in a common Deephaven engine retrieval pattern. This is expected to be high throughput relative to
 * {@link SerialColumnIterator} implementations, but may have material initialization and teardown costs for small or
 * sparse iterations.
 */
public abstract class ChunkedColumnIterator>
        implements ColumnIterator {

    /**
     * The default for {@code chunkSize} used by constructors that don't accept an explicit size.
     */
    public static final int DEFAULT_CHUNK_SIZE = 1 << 11; // This is the block size for ArrayBackedColumnSource

    private final int chunkSize;

    private ChunkSource chunkSource;
    private ChunkSource.GetContext getContext;
    private RowSequence.Iterator rowKeyIterator;
    private long remainingRowKeys;

    CHUNK_TYPE currentData;
    int currentOffset;

    /**
     * Create a new ChunkedColumnIterator.
     *
     * @param chunkSource The {@link ChunkSource} to fetch values from
     * @param rowSequence The {@link RowSequence} to iterate over
     * @param chunkSize The internal buffer size to use when fetching data
     * @param firstRowKey The first row key from {@code rowSequence} to iterate
     * @param length The total number of rows to iterate
     */
    ChunkedColumnIterator(
            @NotNull final ChunkSource chunkSource,
            @NotNull final RowSequence rowSequence,
            final int chunkSize,
            final long firstRowKey,
            final long length) {
        this.chunkSize = Require.gtZero(chunkSize, "chunkSize");

        this.chunkSource = chunkSource;
        getContext = chunkSource.makeGetContext(chunkSize);
        rowKeyIterator = rowSequence.getRowSequenceIterator();
        final long consumed = rowKeyIterator.advanceAndGetPositionDistance(firstRowKey);
        if (rowKeyIterator.peekNextKey() != firstRowKey) {
            throw new IllegalArgumentException(String.format(
                    "Invalid first row key %d, not present in iteration row sequence", firstRowKey));
        }
        if (rowSequence.size() - consumed < length) {
            throw new IllegalArgumentException(String.format(
                    "Invalid length %d, iteration row sequence only contains %d rows (%d already consumed)",
                    length, rowSequence.size(), consumed));
        }
        remainingRowKeys = length;

        currentData = null;
        currentOffset = Integer.MAX_VALUE;
    }

    static ChunkSource validateChunkType(
            @NotNull final ChunkSource chunkSource,
            @NotNull final ChunkType expectedChunkType) {
        final ChunkType chunkType = chunkSource.getChunkType();
        if (chunkType != expectedChunkType) {
            throw new IllegalArgumentException("Illegal chunk type " + chunkType + ", expected " + expectedChunkType);
        }
        return chunkSource;
    }

    @Override
    public final long remaining() {
        return remainingRowKeys + (currentData == null ? 0 : currentData.size() - currentOffset);
    }

    @Override
    public final boolean hasNext() {
        if ((currentData == null || currentOffset >= currentData.size())
                && remainingRowKeys <= 0) {
            close();
            return false;
        }
        return true;
    }

    /**
     * Maybe advance this ChunkedColumnIterator if necessary (that is, if there is no current chunk or no remaining
     * elements in the current chunk), by reading the next chunk and setting {@link #currentData} and
     * {@link #currentOffset} accordingly.
     * 
     * @throws NoSuchElementException If this ChunkedColumnIterator is exhausted
     */
    final void maybeAdvance() {
        if (currentData == null || currentOffset >= currentData.size()) {
            if (remainingRowKeys <= 0) {
                close();
                throw new NoSuchElementException();
            }
            final RowSequence currentRowKeys =
                    rowKeyIterator.getNextRowSequenceWithLength(Math.min(chunkSize, remainingRowKeys));
            remainingRowKeys -= currentRowKeys.size();
            currentData = castChunk(chunkSource.getChunk(getContext, currentRowKeys));
            currentOffset = 0;
        }
    }

    /**
     * Cast {@code chunk} to the appropriate class for this implementation.
     *
     * @param chunk The {@link Chunk} to cast
     * @return {@code chunk} with the appropriate cast applied
     */
    abstract CHUNK_TYPE castChunk(@NotNull final Chunk chunk);

    /**
     * Invoke {@code consumeCurrentChunk} to consume all data in each remaining chunk of data in this
     * ChunkedColumnIterator.
     *
     * @param consumeCurrentChunk The procedure to invoke. Must result in {@code currentOffset == currentData.size()}.
     *        Takes no arguments, because this method is only called by tightly-coupled classes with access to
     *        {@link #currentData} and {@link #currentOffset}.
     */
    final void consumeRemainingByChunks(@NotNull final Runnable consumeCurrentChunk) {
        while (hasNext()) {
            maybeAdvance();
            consumeCurrentChunk.run();
        }
    }

    @Override
    public final void close() {
        // @formatter:off
        try (final SafeCloseable ignored1 = getContext;
             final SafeCloseable ignored2 = rowKeyIterator) {
            // @formatter:on
            chunkSource = null;
            getContext = null;
            rowKeyIterator = null;
            remainingRowKeys = 0;
            currentData = null;
            currentOffset = Integer.MAX_VALUE;
        }
    }

    public static  ColumnIterator make(
            @NotNull final ChunkSource chunkSource,
            @NotNull final RowSequence rowSequence) {
        return make(chunkSource, rowSequence, DEFAULT_CHUNK_SIZE);
    }

    public static  ColumnIterator make(
            @NotNull final ChunkSource chunkSource,
            @NotNull final RowSequence rowSequence,
            int chunkSize) {
        chunkSize = Math.max((int) Math.min(chunkSize, rowSequence.size()), SMALLEST_POOLED_CHUNK_CAPACITY);
        final ColumnIterator result;
        switch (chunkSource.getChunkType()) {
            case Char:
                result = new ChunkedCharacterColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Byte:
                result = new ChunkedByteColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Short:
                result = new ChunkedShortColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Int:
                result = new ChunkedIntegerColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Long:
                result = new ChunkedLongColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Float:
                result = new ChunkedFloatColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Double:
                result = new ChunkedDoubleColumnIterator(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Object:
                result = new ChunkedObjectColumnIterator<>(
                        chunkSource, rowSequence, chunkSize, rowSequence.firstRowKey(), rowSequence.size());
                break;
            case Boolean:
            default:
                throw new UnsupportedOperationException("Unexpected chunk type: " + chunkSource.getChunkType());
        }
        // noinspection unchecked
        return (ColumnIterator) result;
    }
}