All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.deephaven.engine.table.impl.updateby.rollinggroup.RollingGroupOperator Maven / Gradle / Ivy

There is a newer version: 0.37.1
Show newest version
package io.deephaven.engine.table.impl.updateby.rollinggroup;

import io.deephaven.base.ringbuffer.LongRingBuffer;
import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.*;
import io.deephaven.chunk.attributes.Values;
import io.deephaven.engine.rowset.RowSequence;
import io.deephaven.engine.rowset.RowSet;
import io.deephaven.engine.rowset.chunkattributes.OrderedRowKeys;
import io.deephaven.engine.table.*;
import io.deephaven.engine.table.impl.MatchPair;
import io.deephaven.engine.table.impl.QueryTable;
import io.deephaven.engine.table.impl.sources.*;
import io.deephaven.engine.table.impl.sources.aggregate.*;
import io.deephaven.engine.table.impl.updateby.UpdateByOperator;
import io.deephaven.engine.table.impl.util.ChunkUtils;
import io.deephaven.engine.table.impl.util.RowRedirection;
import io.deephaven.util.SafeCloseable;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.HashMap;
import java.util.Map;

import static io.deephaven.util.QueryConstants.NULL_INT;
import static io.deephaven.util.QueryConstants.NULL_LONG;

public class RollingGroupOperator extends UpdateByOperator {
    /**
     * Store input/output column information for retrieval.
     */
    private final String[] inputColumnNames;
    private final String[] outputColumnNames;
    private final ColumnSource[] outputSources;
    private final Map> outputSourceMap;


    /** Store a mapping from row keys to bucket RowSets */
    protected final WritableColumnSource groupRowSetSource;
    protected final ObjectArraySource innerGroupRowSetSource;

    /**
     * These sources will retain the position offsets from the current row position for the source and end of this
     * window. A primary benefit of storing offsets in position space vs. row keys is eliminating the need to shift keys
     * stored inside these column sources. Also, any insertion/modification/remove that occurs within this specified
     * window will trigger re-computation of these position offsets.
     * 

* NOTE: these offsets are inclusive and are stored as simple relative positions where start==0 or end==0 implies * the current row is included in the window. A start/end range of [-5,-3] defines a window including exactly 3 rows * beginning 5 rows earlier than this one and continuing through 3 rows earlier than this (inclusive). A range of * [0,0] contains exactly the current row. */ protected final WritableColumnSource startSource; protected final LongArraySource innerStartSource; protected final WritableColumnSource endSource; protected final LongArraySource innerEndSource; private ModifiedColumnSet.Transformer inputOutputTransformer; private ModifiedColumnSet[] outputModifiedColumnSets; public class Context extends UpdateByOperator.Context { private static final int BUFFER_INITIAL_CAPACITY = 512; public final ChunkSink.FillFromContext groupRowSetSourceFillFromContext; public final WritableObjectChunk groupRowSetSourceOutputValues; public final ChunkSink.FillFromContext startSourceFillFromContext; public final WritableLongChunk startSourceOutputValues; public final ChunkSink.FillFromContext endSourceFillFromContext; public final WritableLongChunk endSourceOutputValues; private final LongRingBuffer windowKeys; private long startPos = NULL_LONG; private long endPos = NULL_LONG; protected Context(final int chunkSize) { groupRowSetSourceFillFromContext = groupRowSetSource.makeFillFromContext(chunkSize); groupRowSetSourceOutputValues = WritableObjectChunk.makeWritableChunk(chunkSize); if (timestampColumnName != null) { startSourceFillFromContext = startSource.makeFillFromContext(chunkSize); startSourceOutputValues = WritableLongChunk.makeWritableChunk(chunkSize); endSourceFillFromContext = endSource.makeFillFromContext(chunkSize); endSourceOutputValues = WritableLongChunk.makeWritableChunk(chunkSize); windowKeys = new LongRingBuffer(BUFFER_INITIAL_CAPACITY, true); } else { startSourceFillFromContext = null; startSourceOutputValues = null; endSourceFillFromContext = null; endSourceOutputValues = null; windowKeys = null; } } @Override public void close() { SafeCloseable.closeAll( groupRowSetSourceFillFromContext, groupRowSetSourceOutputValues, startSourceFillFromContext, startSourceOutputValues, endSourceFillFromContext, endSourceOutputValues); } @Override public void accumulateCumulative(RowSequence inputKeys, Chunk[] valueChunkArr, LongChunk tsChunk, int len) { throw new IllegalStateException("accumulateCumulative() is invalid for RollingGroupOperator"); } @Override public void accumulateRolling(RowSequence inputKeys, Chunk[] influencerValueChunkArr, LongChunk affectedPosChunk, LongChunk influencerPosChunk, IntChunk pushChunk, IntChunk popChunk, int len) { if (timestampColumnName == null) { // The only work for ticks operators is to update the groupRowSetSource groupRowSetSource.fillFromChunk(groupRowSetSourceFillFromContext, groupRowSetSourceOutputValues, inputKeys); return; } setPosChunks(affectedPosChunk, influencerPosChunk); int pushIndex = 0; // chunk processing for (int ii = 0; ii < len; ii++) { final int pushCount = pushChunk.get(ii); final int popCount = popChunk.get(ii); if (pushCount == NULL_INT) { // Setting NULL_LONG, NULL_LONG marks this as an invalid row. startSourceOutputValues.set(ii, NULL_LONG); endSourceOutputValues.set(ii, NULL_LONG); continue; } // pop for this row if (popCount > 0) { pop(popCount); } // push for this row if (pushCount > 0) { push(pushIndex, pushCount); pushIndex += pushCount; } // write the results to the output chunk writeToOutputChunk(ii); } // chunk output to column writeToOutputColumn(inputKeys); } @Override public void setValueChunks(@NotNull final Chunk[] valueChunks) { // NOP } @Override public void push(int pos, int count) { windowKeys.ensureRemaining(count); for (int ii = 0; ii < count; ii++) { endPos = influencerPosChunk.get(pos + ii); windowKeys.addUnsafe(endPos); if (startPos == NULL_LONG) { startPos = endPos; } } } @Override public void pop(int count) { Assert.geq(windowKeys.size(), "windowKeys.size()", count); for (int ii = 0; ii < count; ii++) { windowKeys.removeUnsafe(); if (!windowKeys.isEmpty()) { startPos = windowKeys.front(); } else { startPos = NULL_LONG; endPos = NULL_LONG; } } } @Override public void writeToOutputChunk(int outIdx) { if (startPos == NULL_LONG) { // Setting NULL_LONG, 0 signifies empty window startSourceOutputValues.set(outIdx, NULL_LONG); endSourceOutputValues.set(outIdx, 0); } else { final long affectedPos = affectedPosChunk.get(outIdx); startSourceOutputValues.set(outIdx, startPos - affectedPos); // Store endPos as an exclusive value by incrementing by one endSourceOutputValues.set(outIdx, endPos - affectedPos + 1); } } @Override public void reset() { startPos = NULL_LONG; endPos = NULL_LONG; if (windowKeys != null) { windowKeys.clear(); } } @Override public void writeToOutputColumn(@NotNull RowSequence inputKeys) { startSource.fillFromChunk(startSourceFillFromContext, startSourceOutputValues, inputKeys); endSource.fillFromChunk(endSourceFillFromContext, endSourceOutputValues, inputKeys); groupRowSetSource.fillFromChunk(groupRowSetSourceFillFromContext, groupRowSetSourceOutputValues, inputKeys); } } public RollingGroupOperator(@NotNull final MatchPair[] pairs, @NotNull final String[] affectingColumns, @Nullable final RowRedirection rowRedirection, @Nullable final String timestampColumnName, final long reverseWindowScaleUnits, final long forwardWindowScaleUnits, @NotNull final ColumnSource[] valueSources // region extra-constructor-args // endregion extra-constructor-args ) { super(pairs[0], affectingColumns, rowRedirection, timestampColumnName, reverseWindowScaleUnits, forwardWindowScaleUnits, true); inputColumnNames = new String[pairs.length]; outputColumnNames = new String[pairs.length]; outputSources = new ColumnSource[pairs.length]; outputSourceMap = new HashMap<>(); // For the sake of rolling group operators, we need to map from every row to its bucket row set (or the // non-null version) and to the start and end ranges of keys for the window slice. If we are using // redirection, use it for these structures as well. if (rowRedirection != null) { innerGroupRowSetSource = new ObjectArraySource<>(RowSet.class); groupRowSetSource = WritableRedirectedColumnSource.maybeRedirect(rowRedirection, innerGroupRowSetSource, 0); } else { groupRowSetSource = new ObjectSparseArraySource<>(RowSet.class); innerGroupRowSetSource = null; } // If we are computing through ticks, we won't need sources for start and end. if (timestampColumnName == null) { innerStartSource = null; innerEndSource = null; startSource = null; endSource = null; } else { // We are computing using the timestamp columns, create the appropriate sources (potentially redirected) if (rowRedirection == null) { startSource = new LongSparseArraySource(); endSource = new LongSparseArraySource(); innerStartSource = null; innerEndSource = null; } else { innerStartSource = new LongArraySource(); innerEndSource = new LongArraySource(); startSource = WritableRedirectedColumnSource.maybeRedirect(rowRedirection, innerStartSource, 0); endSource = WritableRedirectedColumnSource.maybeRedirect(rowRedirection, innerEndSource, 0); } } for (int ii = 0; ii < pairs.length; ii++) { final MatchPair pair = pairs[ii]; final Class csType = valueSources[ii].getType(); inputColumnNames[ii] = pair.rightColumn; outputColumnNames[ii] = pair.leftColumn; // When timestampColumnName == null, we have a tick-based rolling window. RollingOpSpec accepts fwd/rev // tick parameters and applies the constraint that the current row belongs to the reverse window. This // implies that to create a group containing exactly the current row, you must provide a rev/fwd range of // [1, 0]. This constraint is useful for the user but should not propagate to the general purpose // output AggregateColumnSource constructors. We are converting the Rolling window range of values to a // simple +/- relative positional offset range where [0, 0] implies a group that contains only the current // row. Similarly, a user range of [5, -3] will convert to [-4, -2) and in both cases will be a group of // two rows starting 4 rows before the current row. // // The aggregated column source range is half-open, so we add one to the inclusive fwd units to convert. outputSources[ii] = timestampColumnName != null ? AggregateColumnSource.makeSliced((ColumnSource) valueSources[ii], groupRowSetSource, startSource, endSource) : AggregateColumnSource.makeSliced((ColumnSource) valueSources[ii], groupRowSetSource, -reverseWindowScaleUnits + 1, forwardWindowScaleUnits + 1); outputSourceMap.put(outputColumnNames[ii], outputSources[ii]); } } @NotNull @Override public UpdateByOperator.Context makeUpdateContext(final int chunkSize, final int influencerChunkSize) { return new Context(chunkSize); } @Override public void prepareForParallelPopulation(final RowSet changedRows) { if (rowRedirection != null) { assert innerGroupRowSetSource != null; ((WritableSourceWithPrepareForParallelPopulation) innerGroupRowSetSource) .prepareForParallelPopulation(changedRows); if (timestampColumnName != null) { assert innerStartSource != null; ((WritableSourceWithPrepareForParallelPopulation) innerStartSource) .prepareForParallelPopulation(changedRows); assert innerEndSource != null; ((WritableSourceWithPrepareForParallelPopulation) innerEndSource) .prepareForParallelPopulation(changedRows); } } else { ((WritableSourceWithPrepareForParallelPopulation) groupRowSetSource) .prepareForParallelPopulation(changedRows); if (timestampColumnName != null) { ((WritableSourceWithPrepareForParallelPopulation) startSource) .prepareForParallelPopulation(changedRows); ((WritableSourceWithPrepareForParallelPopulation) endSource).prepareForParallelPopulation(changedRows); } } } @Override public void initializeRolling(@NotNull final UpdateByOperator.Context context, @NotNull final RowSet bucketRowSet) { super.initializeRolling(context, bucketRowSet); Context ctx = (Context) context; ctx.groupRowSetSourceOutputValues.fillWithValue(0, ctx.groupRowSetSourceOutputValues.size(), bucketRowSet); } // region Shifts @Override public void applyOutputShift(@NotNull final RowSet subRowSetToShift, final long delta) { ((ObjectSparseArraySource) groupRowSetSource).shift(subRowSetToShift, delta); if (timestampColumnName != null) { ((LongSparseArraySource) startSource).shift(subRowSetToShift, delta); ((LongSparseArraySource) endSource).shift(subRowSetToShift, delta); } } // endregion Shifts // region clear-output @Override public void clearOutputRows(final RowSet toClear) { // if we are redirected, clear the inner source if (rowRedirection != null) { ChunkUtils.fillWithNullValue(innerGroupRowSetSource, toClear); } else { ChunkUtils.fillWithNullValue(groupRowSetSource, toClear); } } // endregion clear-output @Override public void startTrackingPrev() { groupRowSetSource.startTrackingPrevValues(); if (rowRedirection != null) { assert innerGroupRowSetSource != null; innerGroupRowSetSource.startTrackingPrevValues(); } // If we are time-based, track the start/end sources as well if (timestampColumnName != null) { startSource.startTrackingPrevValues(); endSource.startTrackingPrevValues(); if (rowRedirection != null) { assert innerStartSource != null; innerStartSource.startTrackingPrevValues(); assert innerEndSource != null; innerEndSource.startTrackingPrevValues(); } } for (ColumnSource outputSource : outputSources) { outputSource.startTrackingPrevValues(); } } /** * Get an array of the output column names. * * @return the output column names. */ @NotNull protected String[] getOutputColumnNames() { return outputColumnNames; } /** * Get a map of outputName to output {@link ColumnSource} for this operation. * * @return a map of output column name to output column source */ @NotNull @Override public Map> getOutputColumns() { return outputSourceMap; } /** * Get the names of the input column(s) for this operator. * * @return the names of the input column */ @NotNull @Override protected String[] getInputColumnNames() { return inputColumnNames; } /** * Return whether the operator needs affected and influencer row positions during accumulation. RollingGroup sets * this to {@code true}. */ @Override protected boolean requiresRowPositions() { return true; } /** * Create the modified column set for the input columns of this operator. */ @Override protected void createInputModifiedColumnSet(@NotNull final QueryTable source) { inputModifiedColumnSet = source.newModifiedColumnSet(getAffectingColumnNames()); // inputModifiedColumnSet needs to be set before we can create the transformer. createInputOutputTransformer(); } /** * Create the modified column set for the output columns from this operator. */ @Override protected void createOutputModifiedColumnSet(@NotNull final QueryTable result) { final String[] colNames = getOutputColumnNames(); outputModifiedColumnSet = result.newModifiedColumnSet(colNames); // Create an individual MCS for each output column. outputModifiedColumnSets = new ModifiedColumnSet[colNames.length]; for (int ii = 0; ii < colNames.length; ii++) { outputModifiedColumnSets[ii] = result.newModifiedColumnSet(colNames[ii]); } // outputModifiedColumnSets need to be set before we can create the transformer. createInputOutputTransformer(); } private void createInputOutputTransformer() { if (inputOutputTransformer != null || inputModifiedColumnSet == null || outputModifiedColumnSet == null) { return; } // Create the transformer to map from the input columns to the individual output column MCS. inputOutputTransformer = inputModifiedColumnSet.newTransformer(getInputColumnNames(), outputModifiedColumnSets); } /** * Set the downstream modified column set appropriately for this operator. */ @Override protected void extractDownstreamModifiedColumnSet(@NotNull final TableUpdate upstream, @NotNull final TableUpdate downstream) { if (upstream.added().isNonempty() || upstream.removed().isNonempty()) { downstream.modifiedColumnSet().setAll(getOutputModifiedColumnSet()); return; } if (upstream.modified().isNonempty()) { inputOutputTransformer.transform(upstream.modifiedColumnSet(), downstream.modifiedColumnSet()); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy