io.deephaven.engine.table.impl.updateby.UpdateBy Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of deephaven-engine-table Show documentation
Engine Table: Implementation and closely-coupled utilities
There is a newer version: 0.37.1
Show newest version
package io.deephaven.engine.table.impl.updateby;

import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TObjectIntHashMap;
import io.deephaven.api.ColumnName;
import io.deephaven.api.updateby.ColumnUpdateOperation;
import io.deephaven.api.updateby.UpdateByControl;
import io.deephaven.api.updateby.UpdateByOperation;
import io.deephaven.base.log.LogOutput;
import io.deephaven.base.log.LogOutputAppendable;
import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.Chunk;
import io.deephaven.chunk.ResettableWritableObjectChunk;
import io.deephaven.chunk.attributes.Values;
import io.deephaven.configuration.Configuration;
import io.deephaven.engine.context.ExecutionContext;
import io.deephaven.engine.exceptions.UncheckedTableException;
import io.deephaven.engine.liveness.LivenessScopeStack;
import io.deephaven.engine.rowset.*;
import io.deephaven.engine.rowset.chunkattributes.RowKeys;
import io.deephaven.engine.table.*;
import io.deephaven.engine.table.impl.*;
import io.deephaven.engine.table.impl.perf.BasePerformanceEntry;
import io.deephaven.engine.table.impl.perf.PerformanceEntry;
import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder;
import io.deephaven.engine.table.impl.sources.*;
import io.deephaven.engine.table.impl.sources.sparse.SparseConstants;
import io.deephaven.engine.table.impl.util.*;
import io.deephaven.engine.updategraph.*;
import io.deephaven.engine.updategraph.impl.PeriodicUpdateGraph;
import io.deephaven.engine.util.systemicmarking.SystemicObjectTracker;
import io.deephaven.util.SafeCloseable;
import io.deephaven.util.SafeCloseableArray;
import io.deephaven.util.datastructures.linked.IntrusiveDoublyLinkedNode;
import io.deephaven.util.datastructures.linked.IntrusiveDoublyLinkedQueue;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.mutable.MutableObject;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.io.IOException;
import java.lang.ref.SoftReference;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicIntegerArray;
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.function.Consumer;
import java.util.stream.IntStream;

/**
 * The core of the {@link Table#updateBy(UpdateByControl, Collection, Collection)} operation.
 */
public abstract class UpdateBy {
    /** When caching a column source, how many rows should we process in each parallel batch? (1M default) */
    private static final int PARALLEL_CACHE_BATCH_SIZE =
            Configuration.getInstance().getIntegerWithDefault("UpdateBy.parallelCacheBatchSize", 1 << 20);
    /** When caching a column source, what size chunks should be used to move data to the cache? (64K default) */
    private static final int PARALLEL_CACHE_CHUNK_SIZE =
            Configuration.getInstance().getIntegerWithDefault("UpdateBy.parallelCacheChunkSize", 1 << 16);

    /** When extracting keys from the redirection, what size chunks to use? (2K default) */
    private static final int REDIRECTION_CHUNK_SIZE = 1 << 11;

    /** Input sources may be reused by multiple operators, only store and cache unique ones (post-reinterpret) */
    protected final ColumnSource[] inputSources;
    /** All the windows for this UpdateBy manager */
    protected final UpdateByWindow[] windows;
    /** The source table for the UpdateBy operators */
    protected final QueryTable source;
    /** Helper class for maintaining the RowRedirection when using redirected output sources */
    protected final UpdateByRedirectionHelper redirHelper;
    /** User control to specify UpdateBy parameters */
    protected final UpdateByControl control;
    /** The single timestamp column used by all time-based operators */
    protected final String timestampColumnName;
    /** Whether caching benefits this UpdateBy operation */
    protected final boolean inputCacheNeeded;
    /** Whether caching benefits each input source */
    protected final boolean[] inputSourceCacheNeeded;
    /**
     * References to the dense array sources we are using for the cached sources. It's expected that these will be
     * released and need to be created.
     */
    protected final SoftReference>[] inputSourceCaches;
    /** For easy iteration, create a list of the source indices that need to be cached */
    protected final int[] cacheableSourceIndices;

    /** Store every bucket in this list for processing */
    protected final IntrusiveDoublyLinkedQueue buckets;

    static class UpdateByRedirectionHelper {
        @Nullable
        private final RowRedirection rowRedirection;
        private final WritableRowSet freeRows;
        private long maxInnerRowKey;

        private UpdateByRedirectionHelper(@Nullable final RowRedirection rowRedirection) {
            this.rowRedirection = rowRedirection;
            // noinspection resource
            this.freeRows = rowRedirection == null || !rowRedirection.isWritable()
                    ? null
                    : RowSetFactory.empty().toTracking();
            this.maxInnerRowKey = 0;
        }

        boolean isRedirected() {
            return rowRedirection != null;
        }

        private long requiredCapacity() {
            return maxInnerRowKey;
        }

        /**
         * Process the upstream {@link TableUpdate update} and return the rowset of dense keys that need cleared for
         * Object array sources
         */
        private WritableRowSet processUpdateForRedirection(
                @NotNull final TableUpdate upstream,
                @NotNull final TrackingRowSet sourceRowSet) {
            assert rowRedirection != null;

            if (!rowRedirection.isWritable()) {
                // The inner row key space is always a flattened view of the outer row key space in this case.
                maxInnerRowKey = sourceRowSet.size() - 1;
                final WritableRowSet denseRowsToClear = sourceRowSet.prev().invert(upstream.removed());
                if (denseRowsToClear.isNonempty() && upstream.added().isNonempty()) {
                    try (final RowSet invertedAdds = sourceRowSet.invert(upstream.added())) {
                        denseRowsToClear.remove(invertedAdds);
                    }
                }
                return denseRowsToClear;
            }

            final WritableRowRedirection writableRowRedirection = rowRedirection.writableCast();
            final WritableRowSet toClear;

            if (upstream.removed().isNonempty()) {
                final RowSetBuilderRandom freeBuilder = RowSetFactory.builderRandom();
                upstream.removed().forAllRowKeys(key -> freeBuilder.addKey(writableRowRedirection.remove(key)));
                // store all freed rows as the candidate toClear set
                toClear = freeBuilder.build();
                freeRows.insert(toClear);
            } else {
                toClear = RowSetFactory.empty();
            }

            if (upstream.shifted().nonempty()) {
                try (final WritableRowSet prevRowSetLessRemoves = sourceRowSet.copyPrev()) {
                    prevRowSetLessRemoves.remove(upstream.removed());
                    writableRowRedirection.applyShift(prevRowSetLessRemoves, upstream.shifted());
                }
            }

            if (upstream.added().isNonempty()) {
                final WritableRowSet.Iterator freeIt = freeRows.iterator();
                upstream.added().forAllRowKeys(outerKey -> {
                    final long innerKey = freeIt.hasNext() ? freeIt.nextLong() : maxInnerRowKey++;
                    writableRowRedirection.put(outerKey, innerKey);
                });
                if (freeIt.hasNext()) {
                    try (final RowSet added = freeRows.subSetByKeyRange(0, freeIt.nextLong() - 1)) {
                        toClear.remove(added);
                        freeRows.remove(added);
                    }
                } else {
                    toClear.clear();
                    freeRows.clear();
                }
            }
            return toClear;
        }

        private RowSet getInnerKeys(final RowSet outerKeys) {
            assert (rowRedirection != null);
            RowSetBuilderRandom builder = RowSetFactory.builderRandom();
            final int chunkSize = Math.min(outerKeys.intSize(), REDIRECTION_CHUNK_SIZE);
            try (final RowSequence.Iterator it = outerKeys.getRowSequenceIterator();
                    ChunkSource.GetContext getContext = rowRedirection.makeGetContext(chunkSize)) {
                while (it.hasMore()) {
                    final RowSequence rs = it.getNextRowSequenceWithLength(chunkSize);
                    Chunk chunk = rowRedirection.getChunk(getContext, rs);
                    builder.addRowKeysChunk(chunk.asLongChunk());
                }
            }
            return builder.build();
        }
    }

    protected UpdateBy(
            @NotNull final QueryTable source,
            @NotNull final UpdateByWindow[] windows,
            @NotNull final ColumnSource[] inputSources,
            @Nullable String timestampColumnName,
            @Nullable final RowRedirection rowRedirection,
            @NotNull final UpdateByControl control) {

        this.source = source;
        this.windows = windows;
        this.inputSources = inputSources;
        this.timestampColumnName = timestampColumnName;
        this.redirHelper = new UpdateByRedirectionHelper(rowRedirection);
        this.control = control;

        this.inputSourceCacheNeeded = new boolean[inputSources.length];
        cacheableSourceIndices = IntStream.range(0, inputSources.length)
                .filter(ii -> !FillUnordered.providesFillUnordered(inputSources[ii]))
                .peek(ii -> inputSourceCacheNeeded[ii] = true)
                .toArray();
        inputCacheNeeded = cacheableSourceIndices.length > 0;

        // noinspection unchecked
        inputSourceCaches = new SoftReference[inputSources.length];

        buckets =
                new IntrusiveDoublyLinkedQueue<>(IntrusiveDoublyLinkedNode.Adapter.getInstance());
    }


    /**
     * Overview of work performed by {@link PhasedUpdateProcessor}:
     * 
     * Create `shiftedRows`, the set of rows for the output sources that are affected by shifts
     * Compute a rowset for each cacheable input source identifying which rows will be needed for processing
     * Compute the modified rowset of output column sources and call `prepareForParallelPopulation()'
     * When prepareForParallelPopulation() complete, apply upstream shifts to the output sources
     * Process each window and operator serially
     * 
     * Pre-create window information for windowed operators (push/pop counts)
     * Cache the input sources that are needed for each window operator (in parallel by chunk of rows)
     * When caching is complete, process the window operator (in parallel by bucket)
     * When all buckets processed, release the input source caches that will not be re-used later by later
     * operators
     * 
     * 
     * When all windows processed, create the downstream update and notify
     * Release resources
     * 
     */
    class PhasedUpdateProcessor implements LogOutputAppendable {
        final TableUpdate upstream;
        final boolean initialStep;
        final CompletableFuture waitForResult;
        final UpdateByBucketHelper[] dirtyBuckets;
        final BitSet dirtyWindows;
        final BitSet[] dirtyWindowOperators;
        /** The active set of sources to use for processing, each source may be cached or original */
        final ColumnSource[] maybeCachedInputSources;
        /** For cacheable sources, the minimal rowset to cache (union of bucket influencer rows) */
        final AtomicReferenceArray inputSourceRowSets;
        /** For cacheable sources, track how many windows require this source */
        final AtomicIntegerArray inputSourceReferenceCounts;
        final JobScheduler jobScheduler;
        final ExecutionContext executionContext;

        /***
         * These rows will be changed because of shifts or removes and will need to be included in
         * {@code prepareForParallelPopulation()} calls
         */
        WritableRowSet changedRows;

        /***
         * These rows will be unused after this cycle and Object columns should NULL these keys
         */
        WritableRowSet toClear;

        PhasedUpdateProcessor(TableUpdate upstream, boolean initialStep) {
            this.upstream = upstream;
            this.initialStep = initialStep;

            waitForResult = new CompletableFuture<>();

            // What items need to be computed this cycle?
            dirtyBuckets = buckets.stream().filter(UpdateByBucketHelper::isDirty).toArray(UpdateByBucketHelper[]::new);
            dirtyWindows = new BitSet(windows.length);
            dirtyWindowOperators = new BitSet[windows.length];

            if (inputCacheNeeded) {
                maybeCachedInputSources = new ColumnSource[inputSources.length];
                inputSourceRowSets = new AtomicReferenceArray<>(inputSources.length);
                inputSourceReferenceCounts = new AtomicIntegerArray(inputSources.length);

                for (int ii = 0; ii < inputSources.length; ii++) {
                    // Set the uncacheable columns into the array.
                    maybeCachedInputSources[ii] = inputSourceCacheNeeded[ii] ? null : inputSources[ii];
                }
            } else {
                maybeCachedInputSources = inputSources;
                inputSourceRowSets = null;
                inputSourceReferenceCounts = null;
            }

            if (initialStep) {
                // Set all windows as dirty and need computation
                dirtyWindows.set(0, windows.length);
                for (int winIdx = 0; winIdx < windows.length; winIdx++) {
                    dirtyWindowOperators[winIdx] = new BitSet(windows[winIdx].operators.length);
                    dirtyWindowOperators[winIdx].set(0, windows[winIdx].operators.length);
                }
                // Create the proper JobScheduler for the following parallel tasks
                if (ExecutionContext.getContext().getOperationInitializer().canParallelize()) {
                    jobScheduler = new OperationInitializerJobScheduler();
                } else {
                    jobScheduler = new ImmediateJobScheduler();
                }
                executionContext = ExecutionContext.newBuilder()
                        .markSystemic().build();
            } else {
                // Determine which windows need to be computed.
                for (int winIdx = 0; winIdx < windows.length; winIdx++) {
                    for (UpdateByBucketHelper bucket : dirtyBuckets) {
                        final UpdateByWindow.UpdateByWindowBucketContext bucketWindowCtx =
                                bucket.windowContexts[winIdx];
                        if (!bucketWindowCtx.isDirty) {
                            continue;
                        }
                        if (dirtyWindowOperators[winIdx] == null) {
                            dirtyWindows.set(winIdx);
                            dirtyWindowOperators[winIdx] = new BitSet(windows[winIdx].operators.length);
                        }
                        final int size = windows[winIdx].operators.length;
                        dirtyWindowOperators[winIdx].or(bucketWindowCtx.dirtyOperators);
                        if (dirtyWindowOperators[winIdx].cardinality() == size) {
                            // all are set, we can stop checking
                            break;
                        }
                    }
                }
                // Create the proper JobScheduler for the following parallel tasks
                if (source.getUpdateGraph().parallelismFactor() > 1) {
                    jobScheduler = new UpdateGraphJobScheduler(source.getUpdateGraph());
                } else {
                    jobScheduler = new ImmediateJobScheduler();
                }
                executionContext = ExecutionContext.newBuilder()
                        .setUpdateGraph(result().getUpdateGraph())
                        .markSystemic().build();
            }
        }

        // region helper-functions
        @Override
        public LogOutput append(LogOutput logOutput) {
            return logOutput.append("UpdateBy.PhasedUpdateProcessor");
        }

        private LogOutputAppendable stringToAppendable(@NotNull final String toAppend) {
            return logOutput -> logOutput.append(toAppend);
        }

        private LogOutputAppendable stringAndIndexToAppendable(@NotNull final String string, final int index) {
            return logOutput -> logOutput.append(string).append('-').append(index);
        }

        private LogOutputAppendable chainAppendables(
                @NotNull final LogOutputAppendable prefix,
                @NotNull final LogOutputAppendable toAppend) {
            return logOutput -> logOutput.append(prefix).append(toAppend);
        }
        // endregion helper-functions

        /**
         * Process the {@link TableUpdate update} provided in the constructor. This performs much work in parallel and
         * leverages {@link JobScheduler} extensively
         */
        public Future processUpdate() {
            if (redirHelper.isRedirected()) {
                // this call does all the work needed for redirected output sources, returns the set of rows we need
                // to clear from our Object array output sources
                toClear = redirHelper.processUpdateForRedirection(upstream, source.getRowSet());
                changedRows = RowSetFactory.empty();

                // clear them now and let them set their own prev states
                if (!initialStep && !toClear.isEmpty()) {
                    forAllOperators(op -> op.clearOutputRows(toClear));
                }
            } else {
                // identify which rows we need to clear in our Object columns (actual clearing will be performed later)
                toClear = source.getRowSet().copyPrev();
                toClear.remove(source.getRowSet());

                // for our sparse array output sources, we need to identify which rows will be affected by the upstream
                // shifts and include them in our parallel update preparations
                if (upstream.shifted().nonempty()) {
                    try (final RowSequence.Iterator it = source.getRowSet().prev().getRowSequenceIterator()) {

                        final RowSetBuilderSequential builder = RowSetFactory.builderSequential();
                        final int size = upstream.shifted().size();

                        // get these in ascending order and use a sequential builder
                        for (int ii = 0; ii < size; ii++) {
                            final long begin = upstream.shifted().getBeginRange(ii);
                            final long end = upstream.shifted().getEndRange(ii);
                            final long delta = upstream.shifted().getShiftDelta(ii);

                            it.advance(begin);
                            final RowSequence rs = it.getNextRowSequenceThrough(end);
                            builder.appendRowSequenceWithOffset(rs, delta);
                        }
                        changedRows = builder.build();
                    }
                } else {
                    changedRows = RowSetFactory.empty();
                }
                // include the cleared rows in the calls to `prepareForParallelPopulation()`
                changedRows.insert(toClear);
            }

            // this is where we leave single-threaded calls and rely on the scheduler to continue the work. Each
            // call will chain to another until the sequence is complete
            computeCachedColumnRowSets(
                    () -> prepareForParallelPopulation(
                            () -> processWindows(
                                    () -> cleanUpAndNotify(
                                            () -> {
                                                // signal to the main task that we have completed our work
                                                waitForResult.complete(null);
                                            }))));

            return waitForResult;
        }

        /**
         * Accumulate in parallel the dirty bucket RowSets for the cacheable input sources. Calls
         * {@code onComputeComplete} when the work is complete.
         */
        private void computeCachedColumnRowSets(final Runnable onComputeComplete) {
            // We have nothing to cache, so we can exit early.
            if (!inputCacheNeeded || dirtyWindows.isEmpty()) {
                onComputeComplete.run();
                return;
            }

            // Initially everything is dirty so cache everything.
            if (initialStep) {
                for (int srcIdx : cacheableSourceIndices) {
                    if (inputSourceCacheNeeded[srcIdx]) {
                        // create a RowSet to be used by `InverseWrappedRowSetRowRedirection`
                        inputSourceRowSets.set(srcIdx, source.getRowSet().copy());

                        // record how many operators require this input source
                        int useCount = 0;
                        for (UpdateByWindow win : windows) {
                            for (int winOpIdx = 0; winOpIdx < win.operators.length; winOpIdx++) {
                                if (win.operatorUsesSource(winOpIdx, srcIdx)) {
                                    useCount++;
                                }
                            }
                        }
                        inputSourceReferenceCounts.set(srcIdx, useCount);
                    }
                }
                onComputeComplete.run();
                return;
            }

            final int[] dirtyWindowIndices = dirtyWindows.stream().toArray();

            jobScheduler.iterateParallel(executionContext,
                    chainAppendables(this, stringToAppendable("-computeCachedColumnRowSets")),
                    JobScheduler.DEFAULT_CONTEXT_FACTORY, 0, cacheableSourceIndices.length,
                    (context, idx, nec) -> {
                        final int srcIdx = cacheableSourceIndices[idx];

                        int useCount = 0;
                        // If any of the dirty operators use this source, then increment the use count
                        for (int winIdx : dirtyWindowIndices) {
                            UpdateByWindow win = windows[winIdx];
                            // combine the row sets from the dirty windows
                            for (UpdateByBucketHelper bucket : dirtyBuckets) {
                                if (!bucket.windowContexts[winIdx].isDirty) {
                                    continue;
                                }

                                UpdateByWindow.UpdateByWindowBucketContext winBucketCtx = bucket.windowContexts[winIdx];
                                WritableRowSet rows = inputSourceRowSets.get(srcIdx);
                                if (rows == null) {
                                    final WritableRowSet influencerCopy =
                                            win.getInfluencerRows(winBucketCtx).copy();
                                    if (!inputSourceRowSets.compareAndSet(srcIdx, null, influencerCopy)) {
                                        influencerCopy.close();
                                        rows = inputSourceRowSets.get(srcIdx);
                                    }
                                }
                                if (rows != null) {
                                    // if not null, then insert this window's rowset
                                    // noinspection SynchronizationOnLocalVariableOrMethodParameter
                                    synchronized (rows) {
                                        rows.insert(win.getInfluencerRows(winBucketCtx));
                                    }
                                }
                            }

                            for (int winOpIdx = 0; winOpIdx < win.operators.length; winOpIdx++) {
                                if (win.operatorUsesSource(winOpIdx, srcIdx)
                                        && dirtyWindowOperators[winIdx].get(winOpIdx)) {
                                    useCount++;
                                }
                            }
                            inputSourceReferenceCounts.set(srcIdx, useCount);
                        }
                    }, onComputeComplete, this::onError);
        }

        /**
         * Prepare each operator output column for the parallel work to follow. Calls
         * {@code onParallelPopulationComplete} when the work is complete
         */
        private void prepareForParallelPopulation(
                final Runnable onParallelPopulationComplete) {
            jobScheduler.iterateParallel(executionContext,
                    chainAppendables(this, stringToAppendable("-prepareForParallelPopulation")),
                    JobScheduler.DEFAULT_CONTEXT_FACTORY, 0,
                    windows.length,
                    (context, winIdx, nec) -> {
                        UpdateByWindow win = windows[winIdx];
                        // Prepare each operator for the parallel updates to come.
                        if (initialStep) {
                            // Prepare the entire set of rows on the initial step.
                            try (final RowSet changedRows = redirHelper.isRedirected()
                                    ? RowSetFactory.flat(redirHelper.requiredCapacity())
                                    : source.getRowSet().copy()) {
                                win.prepareForParallelPopulation(changedRows);
                            }
                        } else {
                            // Get the minimal set of rows to be updated for this window (shiftedRows is empty when
                            // using redirection).
                            try (final WritableRowSet windowRowSet = changedRows.copy()) {
                                for (UpdateByBucketHelper bucket : dirtyBuckets) {
                                    if (win.isWindowBucketDirty(bucket.windowContexts[winIdx])) {
                                        windowRowSet.insert(win.getAffectedRows(bucket.windowContexts[winIdx]));
                                    }
                                }
                                try (final RowSet windowChangedRows = redirHelper.isRedirected()
                                        ? redirHelper.getInnerKeys(windowRowSet)
                                        : null) {
                                    final RowSet rowsToUse =
                                            windowChangedRows == null ? windowRowSet : windowChangedRows;
                                    win.prepareForParallelPopulation(rowsToUse);
                                }
                            }
                        }

                        if (!redirHelper.isRedirected() && upstream.shifted().nonempty()) {
                            // Shift the non-redirected output sources now, after parallelPopulation.
                            upstream.shifted().apply((begin, end, delta) -> {
                                try (final RowSet subRowSet = source.getRowSet().prev().subSetByKeyRange(begin, end)) {
                                    for (UpdateByOperator op : win.getOperators()) {
                                        op.applyOutputShift(subRowSet, delta);
                                    }
                                }
                            });
                        }
                    }, onParallelPopulationComplete, this::onError);
        }

        /**
         * Process all {@code windows} in a serial manner (to minimize cached column memory usage). This function will
         * prepare the shared window resources (e.g. push/pop chunks for Rolling operators) for each dirty bucket in the
         * current window then call {@link #processWindowOperators}. When all operators have been processed then all
         * resources for this window are released before iterating.
         */
        private void processWindows(final Runnable onWindowsComplete) {
            if (dirtyWindows.isEmpty()) {
                onWindowsComplete.run();
                return;
            }

            final int[] dirtyWindowIndices = dirtyWindows.stream().toArray();

            jobScheduler.iterateSerial(executionContext,
                    chainAppendables(this, stringToAppendable("-processWindows")),
                    JobScheduler.DEFAULT_CONTEXT_FACTORY, 0,
                    dirtyWindowIndices.length,
                    (context, idx, nestedErrorConsumer, windowComplete) -> {
                        final int winIdx = dirtyWindowIndices[idx];

                        int maxAffectedChunkSize = 0;
                        int maxInfluencerChunkSize = 0;

                        for (UpdateByBucketHelper bucket : dirtyBuckets) {
                            if (bucket.windowContexts[winIdx].isDirty) {
                                // Assign the (maybe cached) input sources.
                                windows[winIdx].assignInputSources(bucket.windowContexts[winIdx],
                                        maybeCachedInputSources);

                                // Prepare this bucket for processing this window. This allocates window context
                                // resources and rolling ops pre-computes push/pop chunks.
                                windows[winIdx].prepareWindowBucket(bucket.windowContexts[winIdx]);

                                // Determine the largest chunk sizes needed to process the window buckets.
                                maxAffectedChunkSize =
                                        Math.max(maxAffectedChunkSize, bucket.windowContexts[winIdx].workingChunkSize);
                                maxInfluencerChunkSize = Math.max(maxInfluencerChunkSize,
                                        bucket.windowContexts[winIdx] instanceof UpdateByWindowRollingBase.UpdateByWindowRollingBucketContext
                                                ? ((UpdateByWindowRollingBase.UpdateByWindowRollingBucketContext) bucket.windowContexts[winIdx]).maxGetContextSize
                                                : bucket.windowContexts[winIdx].workingChunkSize);
                            }
                        }

                        // Process all the operators in this window
                        processWindowOperators(winIdx, maxAffectedChunkSize, maxInfluencerChunkSize, () -> {
                            // This window has been fully processed, release the resources we allocated
                            for (UpdateByBucketHelper bucket : dirtyBuckets) {
                                if (bucket.windowContexts[winIdx].isDirty) {
                                    windows[winIdx].finalizeWindowBucket(bucket.windowContexts[winIdx]);
                                }
                            }
                            windowComplete.run();
                        }, nestedErrorConsumer);
                    }, onWindowsComplete, this::onError);
        }

        /**
         * Process the operators for a given window in a serial manner. For efficiency, this function organizes the
         * operators into sets of operators that share input sources and that can be computed together efficiently. It
         * also arranges these sets of operators in an order that (hopefully) minimizes the memory footprint of the
         * cached operator input columns.
         * 
         * Before each operator set is processed, the sources for the input columns are cached. After the set is
         * processed, the cached sources are released if they will not be used by following operators.
         */
        private void processWindowOperators(
                final int winIdx,
                final int maxAffectedChunkSize,
                final int maxInfluencerChunkSize,
                final Runnable onProcessWindowOperatorsComplete,
                final Consumer onProcessWindowOperatorsError) {
            final UpdateByWindow win = windows[winIdx];

            // Organize the dirty operators to increase the chance that the input caches can be released early. This
            // currently must produce sets of operators with identical sets of input sources.
            final Integer[] dirtyOperators = ArrayUtils.toObject(dirtyWindowOperators[winIdx].stream().toArray());
            Arrays.sort(dirtyOperators,
                    Comparator.comparingInt(o -> win.operatorInputSourceSlots[(int) o][0])
                            .thenComparingInt(o -> win.operatorInputSourceSlots[(int) o].length < 2 ? -1
                                    : win.operatorInputSourceSlots[(int) o][1]));

            final List operatorSets = new ArrayList<>(dirtyOperators.length);
            final TIntArrayList opList = new TIntArrayList(dirtyOperators.length);

            opList.add(dirtyOperators[0]);
            int lastOpIdx = dirtyOperators[0];
            for (int ii = 1; ii < dirtyOperators.length; ii++) {
                final int opIdx = dirtyOperators[ii];
                if (Arrays.equals(win.operatorInputSourceSlots[opIdx], win.operatorInputSourceSlots[lastOpIdx])) {
                    opList.add(opIdx);
                } else {
                    operatorSets.add(opList.toArray());
                    opList.clear(dirtyOperators.length);
                    opList.add(opIdx);
                }
                lastOpIdx = opIdx;
            }
            operatorSets.add(opList.toArray());

            // Process each set of similar operators in this window serially.
            jobScheduler.iterateSerial(executionContext,
                    chainAppendables(this, stringAndIndexToAppendable("-processWindowOperators", winIdx)),
                    JobScheduler.DEFAULT_CONTEXT_FACTORY, 0,
                    operatorSets.size(),
                    (context, idx, nestedErrorConsumer, opSetComplete) -> {
                        final int[] opIndices = operatorSets.get(idx);

                        // All operators in this bin have identical input source sets
                        final int[] srcIndices = windows[winIdx].operatorInputSourceSlots[opIndices[0]];

                        // Cache the input sources for these operators.
                        cacheOperatorInputSources(winIdx, srcIndices, () -> {
                            // Process the subset of operators for this window.
                            processWindowOperatorSet(winIdx, opIndices, srcIndices, maxAffectedChunkSize,
                                    maxInfluencerChunkSize,
                                    () -> {
                                        // Release the cached sources that are no longer needed.
                                        releaseInputSources(srcIndices);
                                        opSetComplete.run();
                                    }, nestedErrorConsumer);
                        }, nestedErrorConsumer);
                    }, onProcessWindowOperatorsComplete, onProcessWindowOperatorsError);
        }

        /**
         * Create cached input sources for source indices provided. Calls {@code onCachingComplete} when the work is
         * complete.
         */
        private void cacheOperatorInputSources(
                final int winIdx,
                final int[] srcIndices,
                final Runnable onCachingComplete,
                final Consumer onCachingError) {
            if (!inputCacheNeeded) {
                // no work to do, continue
                onCachingComplete.run();
                return;
            }

            jobScheduler.iterateParallel(executionContext,
                    chainAppendables(this, stringAndIndexToAppendable("-cacheOperatorInputSources", winIdx)),
                    JobScheduler.DEFAULT_CONTEXT_FACTORY, 0, srcIndices.length,
                    (context, idx, nestedErrorConsumer, sourceComplete) -> createCachedColumnSource(
                            srcIndices[idx], sourceComplete, nestedErrorConsumer),
                    onCachingComplete,
                    onCachingError);
        }

        /**
         * Create a new input source cache and populate the required rows in parallel. Calls {@code onSourceComplete}
         * when the work is complete.
         */
        private void createCachedColumnSource(
                int srcIdx,
                final Runnable onSourceComplete,
                final Consumer onSourceError) {
            final WritableRowSet inputRowSet = inputSourceRowSets.get(srcIdx);

            if (maybeCachedInputSources[srcIdx] != null || inputRowSet == null) {
                // already cached from another operator (or caching not needed)
                onSourceComplete.run();
                return;
            }

            final ColumnSource inputSource = inputSources[srcIdx];

            // re-use the dense column cache if it still exists
            WritableColumnSource innerSource;
            if (inputSourceCaches[srcIdx] == null || (innerSource = inputSourceCaches[srcIdx].get()) == null) {
                // create a new dense cache
                innerSource = ArrayBackedColumnSource.getMemoryColumnSource(inputSource.getType(),
                        inputSource.getComponentType());
                inputSourceCaches[srcIdx] = new SoftReference<>(innerSource);
            }
            innerSource.ensureCapacity(inputRowSet.size());

            // there will be no updates to this cached column source, so use a simple redirection
            final RowRedirection rowRedirection = new InverseWrappedRowSetRowRedirection(inputRowSet);
            final WritableColumnSource outputSource =
                    WritableRedirectedColumnSource.maybeRedirect(rowRedirection, innerSource, 0);

            // how many batches do we need?
            final int taskCount =
                    Math.toIntExact((inputRowSet.size() + PARALLEL_CACHE_BATCH_SIZE - 1) / PARALLEL_CACHE_BATCH_SIZE);

            final class BatchThreadContext implements JobScheduler.JobThreadContext {
                final RowSequence.Iterator rsIt = inputRowSet.getRowSequenceIterator();
                final ChunkSink.FillFromContext ffc =
                        outputSource.makeFillFromContext(PARALLEL_CACHE_CHUNK_SIZE);
                final ChunkSource.GetContext gc =
                        inputSource.makeGetContext(PARALLEL_CACHE_CHUNK_SIZE);

                @Override
                public void close() {
                    SafeCloseable.closeAll(rsIt, ffc, gc);
                }
            }

            jobScheduler.iterateParallel(executionContext,
                    chainAppendables(this, stringToAppendable("-createCachedColumnSource")),
                    BatchThreadContext::new, 0, taskCount,
                    (ctx, idx, nec) -> {
                        // advance to the first key of this block
                        ctx.rsIt.advance(inputRowSet.get((long) idx * PARALLEL_CACHE_BATCH_SIZE));
                        int remaining = PARALLEL_CACHE_BATCH_SIZE;
                        while (ctx.rsIt.hasMore() && remaining > 0) {
                            final RowSequence chunkOk = ctx.rsIt
                                    .getNextRowSequenceWithLength(Math.min(remaining, PARALLEL_CACHE_CHUNK_SIZE));
                            final Chunk values = inputSource.getChunk(ctx.gc, chunkOk);
                            outputSource.fillFromChunk(ctx.ffc, values, chunkOk);

                            // reduce by the attempted stride, if this is the final block the iterator will
                            // be exhausted and hasMore() will return false
                            remaining -= PARALLEL_CACHE_CHUNK_SIZE;
                        }
                    }, () -> {
                        // assign this now
                        maybeCachedInputSources[srcIdx] = outputSource;
                        onSourceComplete.run();
                    }, onSourceError);
        }

        /**
         * Process a subset of operators from {@code windows[winIdx]} in parallel by bucket. Calls
         * {@code onProcessWindowOperatorSetComplete} when the work is complete
         */
        private void processWindowOperatorSet(final int winIdx,
                final int[] opIndices,
                final int[] srcIndices,
                final int maxAffectedChunkSize,
                final int maxInfluencerChunkSize,
                final Runnable onProcessWindowOperatorSetComplete,
                final Consumer onProcessWindowOperatorSetError) {
            final class OperatorThreadContext implements JobScheduler.JobThreadContext {
                final Chunk[] chunkArr;
                final ChunkSource.GetContext[] chunkContexts;
                final UpdateByOperator.Context[] winOpContexts;

                OperatorThreadContext() {
                    winOpContexts = new UpdateByOperator.Context[opIndices.length];

                    for (int ii = 0; ii < opIndices.length; ii++) {
                        final int opIdx = opIndices[ii];
                        winOpContexts[ii] = windows[winIdx].operators[opIdx].makeUpdateContext(maxAffectedChunkSize,
                                maxInfluencerChunkSize);
                    }

                    // noinspection unchecked
                    chunkArr = new Chunk[srcIndices.length];
                    chunkContexts = new ChunkSource.GetContext[srcIndices.length];

                    // All operators in this bin have identical input source sets
                    for (int ii = 0; ii < srcIndices.length; ii++) {
                        int srcIdx = srcIndices[ii];
                        chunkContexts[ii] = maybeCachedInputSources[srcIdx].makeGetContext(maxInfluencerChunkSize);
                    }
                }

                @Override
                public void close() {
                    SafeCloseableArray.close(winOpContexts);
                    SafeCloseableArray.close(chunkContexts);
                }
            }

            jobScheduler.iterateParallel(executionContext,
                    chainAppendables(this, stringAndIndexToAppendable("-processWindowBucketOperators", winIdx)),
                    OperatorThreadContext::new,
                    0, dirtyBuckets.length,
                    (context, bucketIdx, nec) -> {
                        UpdateByBucketHelper bucket = dirtyBuckets[bucketIdx];
                        if (bucket.windowContexts[winIdx].isDirty) {
                            windows[winIdx].processWindowBucketOperatorSet(
                                    bucket.windowContexts[winIdx],
                                    opIndices,
                                    srcIndices,
                                    context.winOpContexts,
                                    context.chunkArr,
                                    context.chunkContexts,
                                    initialStep);
                        }
                    }, onProcessWindowOperatorSetComplete, onProcessWindowOperatorSetError);
        }


        /** Release the input sources that will not be needed for the rest of this update */
        private void releaseInputSources(int[] sources) {
            try (final ResettableWritableObjectChunk backingChunk =
                    ResettableWritableObjectChunk.makeResettableChunk()) {
                for (int srcIdx : sources) {
                    if (!inputSourceCacheNeeded[srcIdx]) {
                        continue;
                    }

                    if (inputSourceReferenceCounts.decrementAndGet(srcIdx) == 0) {
                        // Last use of this set, let's clean up
                        try (final RowSet rows = inputSourceRowSets.get(srcIdx)) {
                            // release any objects we are holding in the cache
                            if (maybeCachedInputSources[srcIdx] instanceof ObjectArraySource) {
                                final long targetCapacity = rows.size();
                                for (long positionToNull = 0; positionToNull < targetCapacity; positionToNull +=
                                        backingChunk.size()) {
                                    ((ObjectArraySource) maybeCachedInputSources[srcIdx])
                                            .resetWritableChunkToBackingStore(backingChunk, positionToNull);
                                    backingChunk.fillWithNullValue(0, backingChunk.size());
                                }
                            }
                            inputSourceRowSets.set(srcIdx, null);
                            maybeCachedInputSources[srcIdx] = null;
                        }
                    }
                }
            }
        }

        /**
         * Clean up the resources created during this update and notify downstream if applicable. Calls
         * {@code onCleanupComplete} when the work is complete
         */
        private void cleanUpAndNotify(final Runnable onCleanupComplete) {
            // create the downstream before calling finalize() on the buckets (which releases resources)
            final TableUpdate downstream = initialStep ? null : computeDownstreamUpdate();

            // allow the helpers to release their resources
            for (UpdateByBucketHelper bucket : dirtyBuckets) {
                bucket.finalizeUpdate();
            }

            // pass the result downstream
            if (downstream != null) {
                result().notifyListeners(downstream);
            }

            // clear the sparse output columns for rows that no longer exist
            if (!initialStep && !redirHelper.isRedirected() && !toClear.isEmpty()) {
                forAllOperators(op -> op.clearOutputRows(toClear));
            }

            // release remaining resources
            SafeCloseable.closeAll(changedRows, toClear);
            upstream.release();

            // accumulate performance data
            final BasePerformanceEntry accumulated = jobScheduler.getAccumulatedPerformance();
            if (accumulated != null) {
                if (initialStep) {
                    QueryPerformanceRecorder.getInstance().getEnclosingNugget().accumulate(accumulated);
                } else {
                    source.getUpdateGraph().addNotification(new TerminalNotification() {
                        @Override
                        public void run() {
                            final PerformanceEntry entry = sourceListener().getEntry();
                            if (entry != null) {
                                entry.accumulate(accumulated);
                            }
                        }
                    });
                }
            }

            // continue
            onCleanupComplete.run();
        }

        /**
         * Create the update for downstream listeners. This combines all bucket updates/modifies into a unified update
         */
        private TableUpdate computeDownstreamUpdate() {
            final TableUpdateImpl downstream = new TableUpdateImpl();

            // get the adds/removes/shifts from upstream, make a copy since TableUpdateImpl#reset will
            // close them with the upstream update
            downstream.added = upstream.added().copy();
            downstream.removed = upstream.removed().copy();
            downstream.shifted = upstream.shifted();

            // union the modifies from all the tables (including source)
            downstream.modifiedColumnSet = result().getModifiedColumnSetForUpdates();
            downstream.modifiedColumnSet.clear();

            WritableRowSet modifiedRowSet = upstream.modified().copy();
            downstream.modified = modifiedRowSet;

            if (upstream.modified().isNonempty()) {
                mcsTransformer().transform(upstream.modifiedColumnSet(), downstream.modifiedColumnSet);
            }

            for (UpdateByBucketHelper bucket : dirtyBuckets) {
                // retrieve the modified row and column sets from the windows
                for (int winIdx = 0; winIdx < windows.length; winIdx++) {
                    UpdateByWindow win = windows[winIdx];
                    UpdateByWindow.UpdateByWindowBucketContext winCtx = bucket.windowContexts[winIdx];

                    if (win.isWindowBucketDirty(winCtx)) {
                        // add the window modified rows to this set
                        modifiedRowSet.insert(win.getAffectedRows(winCtx));
                        // add the modified output column sets to the downstream set
                        for (int winOpIdx : win.getDirtyOperators(winCtx)) {
                            // these were created directly from the result output columns so no transformer needed
                            win.operators[winOpIdx].extractDownstreamModifiedColumnSet(upstream, downstream);
                        }
                    }
                }

            }
            // should not include upstream adds as modifies
            modifiedRowSet.remove(downstream.added);

            return downstream;
        }

        private void onError(@NotNull final Exception error) {
            // Ensure that cleanup happens
            cleanUpAfterError();
            if (!initialStep) {
                // Dispatch a notification downstream
                deliverUpdateError(error, sourceListener().getEntry(), false);
            }
            // Use the Future to signal that an exception has occurred
            waitForResult.completeExceptionally(error);
        }

        /**
         * Clean up the resources created during this update.
         */
        private void cleanUpAfterError() {
            // allow the helpers to release their resources
            final int[] dirtyWindowIndices = dirtyWindows.stream().toArray();

            for (UpdateByBucketHelper bucket : dirtyBuckets) {
                for (int winIdx : dirtyWindowIndices) {
                    if (bucket.windowContexts[winIdx].isDirty) {
                        windows[winIdx].finalizeWindowBucket(bucket.windowContexts[winIdx]);
                    }
                }
                bucket.finalizeUpdate();
            }

            SafeCloseable.closeAll(changedRows, toClear);

            upstream.release();
        }
    }

    /**
     * Disconnect result from the {@link PeriodicUpdateGraph}, deliver downstream failure notifications, and cleanup if
     * needed.
     *
     * @param error The {@link Throwable} to deliver, either from upstream or update processing
     * @param sourceEntry The {@link TableListener.Entry} to associate with failure messages
     * @param bucketCleanupNeeded Whether to clean up the buckets; unnecessary if the caller has already done this
     */
    void deliverUpdateError(
            @NotNull final Throwable error,
            @Nullable final TableListener.Entry sourceEntry,
            final boolean bucketCleanupNeeded) {

        final QueryTable result = result();
        if (!result.forceReferenceCountToZero()) {
            // No work to do here, another invocation is responsible for delivering failures.
            return;
        }

        if (bucketCleanupNeeded) {
            buckets.stream().filter(UpdateByBucketHelper::isDirty).forEach(UpdateByBucketHelper::finalizeUpdate);
        }

        result.notifyListenersOnError(error, sourceEntry);

        // Secondary notification to client error monitoring
        try {
            if (SystemicObjectTracker.isSystemic(result)) {
                AsyncClientErrorNotifier.reportError(error);
            }
        } catch (IOException e) {
            throw new UncheckedTableException(
                    "Exception while delivering async client error notification for " + sourceEntry, error);
        }
    }

    void forAllOperators(Consumer consumer) {
        for (UpdateByWindow win : windows) {
            for (UpdateByOperator op : win.operators) {
                consumer.accept(op);
            }
        }
    }

    /**
     * The Listener that is called when all input tables (source and constituent) are satisfied. This listener will
     * initiate UpdateBy operator processing in parallel by bucket
     */
    class UpdateByListener extends InstrumentedTableUpdateListenerAdapter {

        private volatile Future processingFuture;

        private UpdateByListener() {
            super(UpdateBy.this + "-SourceListener", UpdateBy.this.source, false);
        }

        @Override
        public void onUpdate(@NotNull final TableUpdate upstream) {
            // If we have a bucket update failure to deliver, deliver it
            if (maybeDeliverPendingFailure()) {
                return;
            }

            // If we delivered a failure in bucketing or bucket creation, short-circuit update delivery
            final QueryTable result = result();
            if (result.isFailed()) {
                Assert.eq(result.getLastNotificationStep(), "result.getLastNotificationStep()",
                        getUpdateGraph().clock().currentStep(),
                        "getUpdateGraph().clock().currentStep()");
                return;
            }

            final PhasedUpdateProcessor sm = new PhasedUpdateProcessor(upstream.acquire(), false);
            processingFuture = sm.processUpdate();
        }

        @Override
        public void onFailureInternal(@NotNull final Throwable originalException, @Nullable final Entry sourceEntry) {
            deliverUpdateError(originalException, sourceEntry, true);
        }

        @Override
        public boolean canExecute(final long step) {
            return upstreamSatisfied(step);
        }

        @Override
        public boolean satisfied(final long step) {
            if (!super.satisfied(step)) {
                // Our parents aren't satisfied yet on this step, or our notification has been enqueued and not yet run
                return false;
            }
            // Our parents are satisfied on this step, and our notification will never be enqueued, or has been run
            final Future localProcessingFuture = processingFuture;
            if (localProcessingFuture == null) {
                // No notification was enqueued, or we've already observed that processing was complete
                return true;
            }
            if (localProcessingFuture.isDone()) {
                // We've observed that processing is complete
                processingFuture = null;
                return true;
            }
            // Processing continues asynchronously
            return false;
        }
    }

    UpdateByListener newUpdateByListener() {
        return new UpdateByListener();
    }

    protected abstract QueryTable result();

    protected abstract UpdateByListener sourceListener();

    protected abstract ModifiedColumnSet.Transformer mcsTransformer();

    protected abstract boolean upstreamSatisfied(final long step);

    protected abstract boolean maybeDeliverPendingFailure();

    // region UpdateBy implementation

    /**
     * This helper class will process the update by clauses and organize the operators into windows that can be
     * processed together. The operators are not fully initialized; the input sources and output sources will not be
     * assigned or created until {@link UpdateByOperator#initializeSources(Table, RowRedirection)} is called.
     */
    public static class UpdateByOperatorCollection {
        /**
         * The table definition used to initialize the operator collection.
         */
        final TableDefinition tableDef;

        /**
         * The name of the timestamp column, if any.
         */
        final String timestampColumnName;

        /**
         * The names of the unique input columns used by the update operators.
         */
        final String[] inputColumnNames;

        /**
         * The names of the output columns created by the update operators, in the order specified by the updateBy call.
         */
        final String[] outputColumnNames;

        /**
         * The names of the columns used to group the input rows into buckets. Empty if no key is specified.
         */
        final String[] byColumnNames;

        /**
         * The names of the columns that are preserved without being overridden by the update operators.
         */
        final String[] preservedColumnNames;

        /**
         * The description of the updateBy operator collection (for error reporting or logging).
         */
        final String description;

        /**
         * Contains the operators for this updateBy call, organized into windows that can be processed together.
         */
        final UpdateByWindow[] windowArr;

        private UpdateByOperatorCollection(
                final TableDefinition tableDef,
                final String timestampColumnName,
                final String[] inputColumnNames,
                final String[] outputColumnNames,
                final String[] byColumnNames,
                final String[] preservedColumnNames,
                final String description,
                final UpdateByWindow[] windowArr) {
            this.tableDef = tableDef;
            this.timestampColumnName = timestampColumnName;
            this.inputColumnNames = inputColumnNames;
            this.outputColumnNames = outputColumnNames;
            this.byColumnNames = byColumnNames;
            this.preservedColumnNames = preservedColumnNames;
            this.description = description;
            this.windowArr = windowArr;
        }

        public static UpdateByOperatorCollection from(
                @NotNull final TableDefinition tableDef,
                @NotNull final UpdateByControl control,
                @NotNull final Collection clauses,
                @NotNull final Collection byColumns) {

            // TODO(deephaven-core#2693): Improve UpdateByBucketHelper implementation for ColumnName

            final UpdateByOperatorFactory updateByOperatorFactory =
                    new UpdateByOperatorFactory(tableDef, MatchPair.fromPairs(byColumns), control);

            final Collection> windowSpecs =
                    updateByOperatorFactory.getWindowOperatorSpecs(clauses);
            if (windowSpecs.isEmpty()) {
                throw new IllegalArgumentException("At least one operator must be specified");
            }

            final StringBuilder descriptionBuilder = new StringBuilder("updateBy(ops={")
                    .append(updateByOperatorFactory.describe(clauses))
                    .append("}");

            final MutableObject timestampColumnName = new MutableObject<>(null);
            // Track which columns are preserved without being overridden by the update operators.
            final LinkedHashSet preservedColumnSet = new LinkedHashSet<>(tableDef.getColumnNameSet());

            final Set problems = new LinkedHashSet<>();
            final Set opResultColumnSet = new HashSet<>();

            final ArrayList inputColumnList = new ArrayList<>();
            final TObjectIntHashMap inputColumnToSlotMap = new TObjectIntHashMap<>();

            final UpdateByWindow[] windowArr = windowSpecs.stream().map(clauseList -> {
                final UpdateByOperator[] windowOps =
                        updateByOperatorFactory.getOperators(clauseList).toArray(UpdateByOperator[]::new);
                final int[][] windowOpSourceSlots = new int[windowOps.length][];

                for (int opIdx = 0; opIdx < windowOps.length; opIdx++) {
                    final UpdateByOperator op = windowOps[opIdx];

                    // Verify no collision in the resulting column names and mark which columns are preserved.
                    final String[] opResultNames = op.getOutputColumnNames();
                    for (final String name : opResultNames) {
                        if (opResultColumnSet.contains(name)) {
                            problems.add(name);
                        }
                        opResultColumnSet.add(name);
                        // remove overridden source columns
                        preservedColumnSet.remove(name);
                    }

                    // Verify no variance in the timestamp column names.
                    if (op.getTimestampColumnName() != null) {
                        if (timestampColumnName.getValue() == null) {
                            timestampColumnName.setValue(op.getTimestampColumnName());
                        } else if (!timestampColumnName.getValue().equals(op.getTimestampColumnName())) {
                            throw new UncheckedTableException(
                                    "Cannot reference more than one timestamp source on a single UpdateBy call {"
                                            + timestampColumnName + ", " + op.getTimestampColumnName() + "}");
                        }
                    }

                    // Iterate over each input column and map this operator to unique source
                    final String[] inputColumnNames = op.getInputColumnNames();
                    windowOpSourceSlots[opIdx] = new int[inputColumnNames.length];

                    for (int colIdx = 0; colIdx < inputColumnNames.length; colIdx++) {
                        final String name = inputColumnNames[colIdx];
                        final int maybeExistingSlot = inputColumnToSlotMap.get(name);
                        if (maybeExistingSlot == inputColumnToSlotMap.getNoEntryValue()) {
                            // create a new input source
                            final int srcIdx = inputColumnList.size();
                            inputColumnList.add(name);
                            inputColumnToSlotMap.put(name, srcIdx);
                            // map the window operator indices to this new source
                            windowOpSourceSlots[opIdx][colIdx] = srcIdx;
                        } else {
                            // map the window indices to this existing source
                            windowOpSourceSlots[opIdx][colIdx] = maybeExistingSlot;
                        }
                    }
                }

                return UpdateByWindow.createFromOperatorArray(windowOps, windowOpSourceSlots);
            }).toArray(UpdateByWindow[]::new);

            if (!problems.isEmpty()) {
                throw new UncheckedTableException(descriptionBuilder + ": resulting column names must be unique {" +
                        String.join(", ", problems) + "}");
            }

            if (!byColumns.isEmpty()) {
                descriptionBuilder.append(", byColumns={").append(byColumns).append("})");

                // Verify the source has all the byColumns
                final Set byColumnSet = tableDef.getColumnNameSet();
                for (final ColumnName byColumn : byColumns) {
                    if (!byColumnSet.contains(byColumn.name())) {
                        problems.add(byColumn.name());
                    }
                }

                if (!problems.isEmpty()) {
                    throw new UncheckedTableException(
                            descriptionBuilder + ": Missing byColumns in parent table {" +
                                    String.join(", ", problems) + "}");
                }
            }

            return new UpdateByOperatorCollection(
                    tableDef,
                    timestampColumnName.getValue(),
                    inputColumnList.toArray(String[]::new),
                    updateByOperatorFactory.getOutputColumns(clauses).toArray(String[]::new),
                    byColumns.stream().map(ColumnName::name).toArray(String[]::new),
                    preservedColumnSet.toArray(String[]::new),
                    descriptionBuilder.toString(),
                    windowArr);
        }

        public UpdateByOperatorCollection copy() {
            // Recreate the windows with copies of each operator.
            final UpdateByWindow[] localWindowArr = new UpdateByWindow[windowArr.length];
            for (int ii = 0; ii < windowArr.length; ii++) {
                localWindowArr[ii] = windowArr[ii].copy();
            }

            return new UpdateByOperatorCollection(
                    tableDef,
                    timestampColumnName,
                    inputColumnNames,
                    outputColumnNames,
                    byColumnNames,
                    preservedColumnNames,
                    description,
                    localWindowArr);
        }
    }

    public static Table updateBy(@NotNull final QueryTable source,
            @NotNull final Collection clauses,
            @NotNull final Collection byColumns,
            @NotNull final UpdateByControl control) {
        final UpdateByOperatorCollection operatorCollection =
                UpdateByOperatorCollection.from(source.getDefinition(), control, clauses, byColumns);
        return updateBy(source, operatorCollection, control);
    }

    /**
     * Apply the specified operations to each group of rows in the source table and produce a result table with the same
     * row set as the source with each operator applied.
     *
     * @param source the source to apply to.
     * @param operatorCollection the collection of operations to apply.
     * @return a new table with the same index as the source with all the operations applied.
     */
    public static Table updateBy(@NotNull final QueryTable source,
            @NotNull final UpdateByOperatorCollection operatorCollection,
            @NotNull final UpdateByControl control) {

        QueryTable.checkInitiateOperation(source);
        // Assert that the operator collection and source table definitions are compatible.
        operatorCollection.tableDef.checkMutualCompatibility(
                source.getDefinition(),
                "OperatorCollection TableDef",
                "Source TableDef");

        // Create the rowRedirection (if instructed by the user)
        final RowRedirection rowRedirection;
        if (control.useRedirectionOrDefault()) {
            if (!source.isRefreshing()) {
                if (!source.isFlat() && SparseConstants.sparseStructureExceedsOverhead(source.getRowSet(),
                        control.maxStaticSparseMemoryOverheadOrDefault())) {
                    rowRedirection = new InverseWrappedRowSetRowRedirection(source.getRowSet());
                } else {
                    rowRedirection = null;
                }
            } else {
                final JoinControl.RedirectionType type = JoinControl.getRedirectionType(source, 4.0, true);
                switch (type) {
                    case Sparse:
                        rowRedirection = new LongColumnSourceWritableRowRedirection(new LongSparseArraySource());
                        break;
                    case Hash:
                        rowRedirection = WritableRowRedirection.FACTORY.createRowRedirection(source.intSize());
                        break;

                    default:
                        throw new IllegalStateException("Unsupported redirection type " + type);
                }
            }
        } else {
            rowRedirection = null;
        }

        final ColumnSource[] inputSourceArr = Arrays.stream(operatorCollection.inputColumnNames)
                .map(colName -> ReinterpretUtils.maybeConvertToPrimitive(source.getColumnSource(colName)))
                .toArray(ColumnSource[]::new);

        final Map> resultSources = new LinkedHashMap<>(source.getColumnSourceMap());

        // We have the source table and the row redirection; we can initialize the operators and add the output
        // columns to the result sources
        for (UpdateByWindow win : operatorCollection.windowArr) {
            for (UpdateByOperator op : win.operators) {
                op.initializeSources(source, rowRedirection);
                resultSources.putAll(op.getOutputColumns());
            }
        }

        if (operatorCollection.byColumnNames.length == 0) {
            return LivenessScopeStack.computeEnclosed(() -> {
                final ZeroKeyUpdateByManager zkm = new ZeroKeyUpdateByManager(
                        operatorCollection.windowArr,
                        inputSourceArr,
                        source,
                        operatorCollection.preservedColumnNames,
                        resultSources,
                        operatorCollection.timestampColumnName,
                        rowRedirection,
                        control);

                if (source.isRefreshing()) {
                    // Tracking previous values
                    if (rowRedirection != null) {
                        rowRedirection.writableCast().startTrackingPrevValues();
                    }
                    for (UpdateByWindow win : operatorCollection.windowArr) {
                        for (UpdateByOperator op : win.operators) {
                            op.startTrackingPrev();
                        }
                    }
                }
                return zkm.result();
            }, source::isRefreshing, DynamicNode::isRefreshing);
        }

        // TODO: test whether the source is static and that UpdateBy call uses only cumulative operators. In this
        // case, we can use an optimized manager and a single pass through the column sources (DHC #3393)

        return LivenessScopeStack.computeEnclosed(() -> {
            final BucketedPartitionedUpdateByManager bm = new BucketedPartitionedUpdateByManager(
                    operatorCollection.windowArr,
                    inputSourceArr,
                    source,
                    operatorCollection.preservedColumnNames,
                    resultSources,
                    operatorCollection.byColumnNames,
                    operatorCollection.timestampColumnName,
                    rowRedirection,
                    control);

            if (source.isRefreshing()) {
                // Tracking previous values
                if (rowRedirection != null) {
                    rowRedirection.writableCast().startTrackingPrevValues();
                }
                for (UpdateByWindow win : operatorCollection.windowArr) {
                    for (UpdateByOperator op : win.operators) {
                        op.startTrackingPrev();
                    }
                }
            }
            return bm.result();
        }, source::isRefreshing, DynamicNode::isRefreshing);
    }
    // endregion
}