io.deephaven.engine.table.impl.by.BlinkFirstChunkedOperator Maven / Gradle / Ivy
Show all versions of deephaven-engine-table Show documentation
/**
* Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending
*/
package io.deephaven.engine.table.impl.by;
import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
import io.deephaven.engine.table.*;
import io.deephaven.engine.rowset.RowSequence;
import io.deephaven.engine.rowset.RowSet;
import io.deephaven.engine.rowset.RowSequenceFactory;
import io.deephaven.engine.rowset.impl.ShiftedRowSequence;
import io.deephaven.engine.table.impl.MatchPair;
import io.deephaven.engine.table.impl.QueryTable;
import io.deephaven.engine.table.ChunkSink;
import io.deephaven.chunk.*;
import io.deephaven.engine.rowset.chunkattributes.OrderedRowKeys;
import io.deephaven.engine.rowset.chunkattributes.RowKeys;
import io.deephaven.util.SafeCloseableList;
import org.jetbrains.annotations.NotNull;
/**
* A firstBy aggregation operator for blink tables.
*
* @see Table#BLINK_TABLE_ATTRIBUTE
*/
public class BlinkFirstChunkedOperator extends BaseBlinkFirstOrLastChunkedOperator {
/**
*
* The next destination slot that we expect to be used.
*
* Any destination less than this one can safely be ignored while processing adds since the first row can never
* change once a destination has been created given that we ignore removes.
*/
private long nextDestination;
/**
*
* The first destination that we used on the current step (if we used any). At the very beginning of a step, this is
* equivalent to {@link #nextDestination} and also the result table's size.
*
* We use this as an offset shift for {@code redirections}, so that {@code redirections} only needs to hold first
* source keys for newly-added destinations, rather than the entire space.
*
* At the end of a step, this is updated to prepare for the next step.
*/
private long firstDestinationThisStep;
BlinkFirstChunkedOperator(@NotNull final MatchPair[] resultPairs, @NotNull final Table blinkTable) {
super(resultPairs, blinkTable);
}
@Override
public final boolean unchunkedRowSet() {
return true;
}
@Override
public final void startTrackingPrevValues() {
// We never change the value at any key in outputColumns since there are no removes; consequently there's no
// need to enable previous value tracking.
}
@Override
public void ensureCapacity(final long tableSize) {
redirections.ensureCapacity(tableSize - firstDestinationThisStep, false);
}
@Override
public void addChunk(final BucketedContext bucketedContext,
final Chunk extends Values> values, // Unused
@NotNull final LongChunk extends RowKeys> inputRowKeys,
@NotNull final IntChunk destinations,
@NotNull final IntChunk startPositions,
final IntChunk length, // Unused
@NotNull final WritableBooleanChunk stateModified) {
final StreamFirstBucketedContext context = (StreamFirstBucketedContext) bucketedContext;
long maxDestination = nextDestination - 1;
// we can essentially do a radix sort; anything less than nextDestination is not of interest; everything else
// must fall between nextDestination and our chunk size
context.rowKeyToInsert.fillWithValue(0, startPositions.size(), Long.MAX_VALUE);
for (int ii = 0; ii < startPositions.size(); ++ii) {
final int startPosition = startPositions.get(ii);
final int destination = destinations.get(startPosition);
if (destination >= nextDestination) {
Assert.lt(destination, "destination", nextDestination + startPositions.size(),
"nextDestination + startPositions.size()");
maxDestination = Math.max(destination, maxDestination);
final long inputRowKey = inputRowKeys.get(startPosition);
final int index = (int) (destination - nextDestination);
context.destinationsToInsert.set(index, destination);
context.rowKeyToInsert.set(index, Math.min(context.rowKeyToInsert.get(index), inputRowKey));
}
}
context.destinationsToInsert.setSize((int) (maxDestination - nextDestination + 1));
context.rowKeyToInsert.setSize((int) (maxDestination - nextDestination + 1));
for (int ii = 0; ii < context.destinationsToInsert.size(); ++ii) {
final int destination = context.destinationsToInsert.get(ii);
final long rowKey = context.rowKeyToInsert.get(ii);
redirections.set(destination - firstDestinationThisStep, rowKey);
}
nextDestination = maxDestination + 1;
}
@Override
public boolean addChunk(final SingletonContext context, // Unused
final int chunkSize,
final Chunk extends Values> values, // Unused
@NotNull final LongChunk extends RowKeys> inputRowKeys,
final long destination) {
if (chunkSize == 0) {
return false;
}
return maybeAssignFirst(destination, inputRowKeys.get(0));
}
@Override
public boolean addRowSet(final SingletonContext context,
@NotNull final RowSet rowSet,
final long destination) {
if (rowSet.isEmpty()) {
return false;
}
return maybeAssignFirst(destination, rowSet.firstRowKey());
}
private boolean maybeAssignFirst(final long destination, final long sourceIndexKey) {
if (destination < nextDestination) {
// Skip anything that's not new, it cannot change the first key
return false;
}
if (destination == nextDestination) {
redirections.set(nextDestination++ - firstDestinationThisStep, sourceIndexKey);
} else {
// noinspection ThrowableNotThrown
Assert.statementNeverExecuted(
"Destination " + destination + " greater than next destination " + nextDestination);
}
return true;
}
@Override
public void propagateInitialState(@NotNull final QueryTable resultTable, int startingDestinationsCount) {
copyStreamToResult(resultTable.getRowSet());
redirections = null;
Assert.eq(resultTable.size(), "resultTable.size()", nextDestination, "nextDestination");
firstDestinationThisStep = nextDestination;
}
@Override
public void propagateUpdates(@NotNull final TableUpdate downstream,
@NotNull final RowSet newDestinations) {
// NB: We cannot assert no modifies; other operators in the same aggregation might modify columns not in our
// result set.
Assert.assertion(downstream.removed().isEmpty() && downstream.shifted().empty(),
"downstream.removed.empty() && downstream.shifted.empty()");
copyStreamToResult(downstream.added());
redirections = null;
if (downstream.added().isNonempty()) {
Assert.eq(downstream.added().lastRowKey() + 1, "downstream.added.lastRowKey() + 1", nextDestination,
"nextDestination");
firstDestinationThisStep = nextDestination;
}
}
/**
*
* For each destination slot, map to the (first) source row key and copy source values to destination slots for all
* result columns.
*
*
* This implementation proceeds chunk-wise in the following manner:
*
* - Get a chunk of destination slots
*
- Get a chunk of source indices
* - For each input column: get a chunk of input values and then fill the output column
*
*
* @param destinations The added destination slots as an {@link RowSequence}
*/
private void copyStreamToResult(@NotNull final RowSequence destinations) {
try (final SafeCloseableList toClose = new SafeCloseableList()) {
final RowSequence.Iterator destinationsIterator = toClose.add(destinations.getRowSequenceIterator());
final ShiftedRowSequence shiftedSliceDestinations = toClose.add(new ShiftedRowSequence());
final ChunkSource.GetContext redirectionsContext =
toClose.add(redirections.makeGetContext(COPY_CHUNK_SIZE));
final SharedContext inputSharedContext = toClose.add(SharedContext.makeSharedContext());
final ChunkSource.GetContext[] inputContexts =
toClose.addArray(new ChunkSource.GetContext[numResultColumns]);
final ChunkSink.FillFromContext[] outputContexts =
toClose.addArray(new ChunkSink.FillFromContext[numResultColumns]);
for (int ci = 0; ci < numResultColumns; ++ci) {
inputContexts[ci] = inputColumns[ci].makeGetContext(COPY_CHUNK_SIZE, inputSharedContext);
final WritableColumnSource> outputColumn = outputColumns[ci];
outputContexts[ci] = outputColumn.makeFillFromContext(COPY_CHUNK_SIZE);
outputColumn.ensureCapacity(destinations.lastRowKey() + 1, false);
}
while (destinationsIterator.hasMore()) {
final RowSequence sliceDestinations =
destinationsIterator.getNextRowSequenceWithLength(COPY_CHUNK_SIZE);
shiftedSliceDestinations.reset(sliceDestinations, -firstDestinationThisStep);
final LongChunk sourceIndices = Chunk.downcast(
redirections.getChunk(redirectionsContext, shiftedSliceDestinations)).asLongChunk();
try (final RowSequence sliceSources = RowSequenceFactory.wrapRowKeysChunkAsRowSequence(sourceIndices)) {
for (int ci = 0; ci < numResultColumns; ++ci) {
final Chunk extends Values> inputChunk =
inputColumns[ci].getChunk(inputContexts[ci], sliceSources);
outputColumns[ci].fillFromChunk(outputContexts[ci], inputChunk, sliceDestinations);
}
inputSharedContext.reset();
}
}
}
}
private static class StreamFirstBucketedContext implements BucketedContext {
final WritableIntChunk destinationsToInsert;
final WritableLongChunk rowKeyToInsert;
public StreamFirstBucketedContext(int size) {
destinationsToInsert = WritableIntChunk.makeWritableChunk(size);
rowKeyToInsert = WritableLongChunk.makeWritableChunk(size);
}
@Override
public void close() {
destinationsToInsert.close();
rowKeyToInsert.close();
}
}
@Override
public BucketedContext makeBucketedContext(int size) {
return new StreamFirstBucketedContext(size);
}
}