io.deephaven.engine.table.impl.by.ChunkedOperatorAggregationHelper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of deephaven-engine-table Show documentation
Show all versions of deephaven-engine-table Show documentation
Engine Table: Implementation and closely-coupled utilities
/**
* Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending
*/
package io.deephaven.engine.table.impl.by;
import gnu.trove.impl.Constants;
import gnu.trove.map.TObjectIntMap;
import gnu.trove.map.hash.TObjectIntHashMap;
import io.deephaven.api.ColumnName;
import io.deephaven.base.Pair;
import io.deephaven.base.verify.Assert;
import io.deephaven.base.verify.Require;
import io.deephaven.chunk.*;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
import io.deephaven.configuration.Configuration;
import io.deephaven.engine.context.ExecutionContext;
import io.deephaven.engine.rowset.*;
import io.deephaven.engine.rowset.chunkattributes.OrderedRowKeys;
import io.deephaven.engine.rowset.chunkattributes.RowKeys;
import io.deephaven.engine.table.*;
import io.deephaven.engine.table.impl.*;
import io.deephaven.engine.table.impl.NoSuchColumnException.Type;
import io.deephaven.engine.table.impl.by.typed.TypedHasherFactory;
import io.deephaven.engine.table.impl.indexer.RowSetIndexer;
import io.deephaven.engine.table.impl.remote.ConstructSnapshot;
import io.deephaven.engine.table.impl.sort.findruns.IntFindRunsKernel;
import io.deephaven.engine.table.impl.sort.permute.LongPermuteKernel;
import io.deephaven.engine.table.impl.sort.permute.PermuteKernel;
import io.deephaven.engine.table.impl.sort.timsort.IntIntTimsortKernel;
import io.deephaven.engine.table.impl.sources.ArrayBackedColumnSource;
import io.deephaven.engine.table.impl.sources.ObjectArraySource;
import io.deephaven.engine.table.impl.sources.ReinterpretUtils;
import io.deephaven.engine.table.impl.sources.regioned.SymbolTableSource;
import io.deephaven.engine.table.impl.util.ChunkUtils;
import io.deephaven.engine.table.impl.util.UpdateSizeCalculator;
import io.deephaven.util.SafeCloseable;
import io.deephaven.util.SafeCloseableArray;
import io.deephaven.util.SafeCloseableList;
import io.deephaven.util.annotations.ReferentialIntegrity;
import io.deephaven.util.annotations.VisibleForTesting;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.mutable.MutableObject;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.*;
import java.util.function.LongFunction;
import java.util.function.Supplier;
import java.util.function.UnaryOperator;
import static io.deephaven.engine.table.impl.by.AggregationRowLookup.DEFAULT_UNKNOWN_ROW;
import static io.deephaven.engine.table.impl.by.AggregationRowLookup.EMPTY_KEY;
@SuppressWarnings("rawtypes")
public class ChunkedOperatorAggregationHelper {
static final int CHUNK_SIZE =
Configuration.getInstance().getIntegerWithDefault("ChunkedOperatorAggregationHelper.chunkSize", 1 << 12);
public static final boolean SKIP_RUN_FIND =
Configuration.getInstance().getBooleanWithDefault("ChunkedOperatorAggregationHelper.skipRunFind", false);
static final boolean HASHED_RUN_FIND =
Configuration.getInstance().getBooleanWithDefault("ChunkedOperatorAggregationHelper.hashedRunFind", true);
static boolean USE_OPEN_ADDRESSED_STATE_MANAGER =
Configuration.getInstance().getBooleanWithDefault(
"ChunkedOperatorAggregationHelper.useOpenAddressedStateManager",
true);
public static QueryTable aggregation(
@NotNull final AggregationContextFactory aggregationContextFactory,
@NotNull final QueryTable input,
final boolean preserveEmpty,
@Nullable final Table initialKeys,
@NotNull final Collection extends ColumnName> groupByColumns) {
return aggregation(AggregationControl.DEFAULT_FOR_OPERATOR,
aggregationContextFactory, input, preserveEmpty, initialKeys, groupByColumns);
}
private static void checkGroupByColumns(String context, TableDefinition tableDefinition, String[] keyNames) {
NoSuchColumnException.throwIf(
tableDefinition.getColumnNameSet(),
Arrays.asList(keyNames),
String.format(
"aggregation: not all group-by columns [%%s] are present in %s with columns [%%s]. Missing columns: [%%s]",
context),
Type.REQUESTED,
Type.AVAILABLE,
Type.MISSING);
}
@VisibleForTesting
public static QueryTable aggregation(
@NotNull final AggregationControl control,
@NotNull final AggregationContextFactory aggregationContextFactory,
@NotNull final QueryTable input,
final boolean preserveEmpty,
@Nullable final Table initialKeys,
@NotNull final Collection extends ColumnName> groupByColumns) {
final String[] keyNames = groupByColumns.stream().map(ColumnName::name).toArray(String[]::new);
checkGroupByColumns("input table", input.getDefinition(), keyNames);
if (initialKeys != null) {
if (keyNames.length == 0) {
throw new IllegalArgumentException(
"aggregation: initial groups must not be specified if no group-by columns are specified");
}
checkGroupByColumns("initial groups", initialKeys.getDefinition(), keyNames);
for (final String keyName : keyNames) {
final ColumnDefinition> inputDef = input.getDefinition().getColumn(keyName);
final ColumnDefinition> initialKeysDef = initialKeys.getDefinition().getColumn(keyName);
if (!inputDef.isCompatible(initialKeysDef)) {
throw new IllegalArgumentException(String.format(
"aggregation: column definition mismatch between input table and initial groups table for %s; input has %s, initial groups has %s",
keyName,
inputDef.describeForCompatibility(),
initialKeysDef.describeForCompatibility()));
}
}
}
final Mutable resultHolder = new MutableObject<>();
final OperationSnapshotControl snapshotControl =
input.createSnapshotControlIfRefreshing(OperationSnapshotControl::new);
BaseTable.initializeWithSnapshot(
"by(" + aggregationContextFactory + ", " + groupByColumns + ")", snapshotControl,
(usePrev, beforeClockValue) -> {
resultHolder.setValue(aggregation(control, snapshotControl, aggregationContextFactory,
input, preserveEmpty, initialKeys, keyNames, usePrev));
return true;
});
return resultHolder.getValue();
}
private static QueryTable aggregation(
@NotNull final AggregationControl control,
@Nullable final OperationSnapshotControl snapshotControl,
@NotNull final AggregationContextFactory aggregationContextFactory,
@NotNull final QueryTable input,
final boolean preserveEmpty,
@Nullable final Table initialKeys,
@NotNull final String[] keyNames,
final boolean usePrev) {
if (keyNames.length == 0) {
// This should be checked before this method is called, but let's verify here in case an additional
// entry point is added incautiously.
Assert.eqNull(initialKeys, "initialKeys");
return noKeyAggregation(snapshotControl, aggregationContextFactory, input, preserveEmpty, usePrev);
}
final ColumnSource>[] keySources =
Arrays.stream(keyNames).map(input::getColumnSource).toArray(ColumnSource[]::new);
final ColumnSource>[] reinterpretedKeySources = Arrays.stream(keySources)
.map(ReinterpretUtils::maybeConvertToPrimitive).toArray(ColumnSource[]::new);
final AggregationContext ac = aggregationContextFactory.makeAggregationContext(
input, input.isRefreshing() && !preserveEmpty, keyNames);
final PermuteKernel[] permuteKernels = ac.makePermuteKernels();
final boolean useGrouping;
if (control.considerGrouping(input, keySources)) {
Assert.eq(keySources.length, "keySources.length", 1);
final boolean hasGrouping = RowSetIndexer.of(input.getRowSet()).hasGrouping(keySources[0]);
if (!input.isRefreshing() && hasGrouping && initialKeys == null) {
return staticGroupedAggregation(input, keyNames[0], keySources[0], ac);
}
// we have no hasPrevGrouping method
useGrouping = !usePrev && hasGrouping && Arrays.equals(reinterpretedKeySources, keySources);
} else {
useGrouping = false;
}
final Table symbolTable;
final boolean useSymbolTable;
if (!input.isRefreshing() && control.considerSymbolTables(input, useGrouping, keySources)) {
Assert.eq(keySources.length, "keySources.length", 1);
symbolTable = ((SymbolTableSource>) keySources[0]).getStaticSymbolTable(input.getRowSet(),
control.useSymbolTableLookupCaching());
useSymbolTable = control.useSymbolTables(input.size(), symbolTable.size());
} else {
symbolTable = null;
useSymbolTable = false;
}
final MutableInt outputPosition = new MutableInt();
final Supplier stateManagerSupplier =
() -> makeStateManager(control, input, keySources, reinterpretedKeySources, ac,
useSymbolTable ? symbolTable : null);
final OperatorAggregationStateManager stateManager;
if (initialKeys == null) {
stateManager = stateManagerSupplier.get();
} else {
stateManager = initialKeyTableAddition(control, initialKeys, keyNames, ac, outputPosition,
stateManagerSupplier);
}
final RowSetBuilderRandom initialRowsBuilder =
initialKeys != null && !preserveEmpty ? new BitmapRandomBuilder(stateManager.maxTableSize() - 1) : null;
if (useGrouping) {
initialGroupedKeyAddition(input, reinterpretedKeySources, ac, stateManager, outputPosition,
initialRowsBuilder, usePrev);
} else {
initialBucketedKeyAddition(input, reinterpretedKeySources, ac, permuteKernels, stateManager,
outputPosition, initialRowsBuilder, usePrev);
}
// Construct and return result table
final ColumnSource[] keyHashTableSources = stateManager.getKeyHashTableSources();
final Map> resultColumnSourceMap = new LinkedHashMap<>();
// Gather the result key columns
final ColumnSource[] keyColumnsRaw = new ColumnSource[keyHashTableSources.length];
final WritableColumnSource[] keyColumnsCopied =
input.isRefreshing() ? new WritableColumnSource[keyHashTableSources.length] : null;
for (int kci = 0; kci < keyHashTableSources.length; ++kci) {
ColumnSource> resultKeyColumnSource = keyHashTableSources[kci];
if (keySources[kci] != reinterpretedKeySources[kci]) {
resultKeyColumnSource = ReinterpretUtils.convertToOriginalType(keySources[kci], resultKeyColumnSource);
}
keyColumnsRaw[kci] = resultKeyColumnSource;
if (input.isRefreshing()) {
// noinspection ConstantConditions,unchecked
keyColumnsCopied[kci] = ArrayBackedColumnSource.getMemoryColumnSource(outputPosition.intValue(),
keyColumnsRaw[kci].getType());
resultColumnSourceMap.put(keyNames[kci], keyColumnsCopied[kci]);
} else {
resultColumnSourceMap.put(keyNames[kci], keyColumnsRaw[kci]);
}
}
ac.getResultColumns(resultColumnSourceMap);
final TrackingWritableRowSet resultRowSet = (initialRowsBuilder == null
? RowSetFactory.flat(outputPosition.intValue())
: initialRowsBuilder.build()).toTracking();
if (input.isRefreshing()) {
copyKeyColumns(keyColumnsRaw, keyColumnsCopied, resultRowSet);
}
// Construct the result table
final QueryTable result = new QueryTable(resultRowSet, resultColumnSourceMap);
ac.propagateInitialStateToOperators(result, outputPosition.intValue());
if (input.isRefreshing()) {
assert keyColumnsCopied != null;
ac.startTrackingPrevValues();
final IncrementalOperatorAggregationStateManager incrementalStateManager =
(IncrementalOperatorAggregationStateManager) stateManager;
incrementalStateManager.startTrackingPrevValues();
final boolean isBlink = input.isBlink();
final TableUpdateListener listener =
new BaseTable.ListenerImpl("by(" + aggregationContextFactory + ")", input, result) {
@ReferentialIntegrity
final OperationSnapshotControl swapListenerHardReference = snapshotControl;
final ModifiedColumnSet keysUpstreamModifiedColumnSet = input.newModifiedColumnSet(keyNames);
final ModifiedColumnSet[] operatorInputModifiedColumnSets =
ac.getInputModifiedColumnSets(input);
final UnaryOperator[] resultModifiedColumnSetFactories =
ac.initializeRefreshing(result, this);
final StateChangeRecorder stateChangeRecorder =
preserveEmpty ? null : ac.getStateChangeRecorder();
@Override
public void onUpdate(@NotNull final TableUpdate upstream) {
incrementalStateManager.beginUpdateCycle();
final TableUpdate upstreamToUse = isBlink ? adjustForBlinkTable(upstream) : upstream;
if (upstreamToUse.empty()) {
return;
}
final TableUpdate downstream;
try (final KeyedUpdateContext kuc = new KeyedUpdateContext(ac, incrementalStateManager,
reinterpretedKeySources, permuteKernels, keysUpstreamModifiedColumnSet,
operatorInputModifiedColumnSets, stateChangeRecorder, upstreamToUse,
outputPosition)) {
downstream = kuc.computeDownstreamIndicesAndCopyKeys(input.getRowSet(),
keyColumnsRaw,
keyColumnsCopied,
result.getModifiedColumnSetForUpdates(), resultModifiedColumnSetFactories);
}
if (downstream.empty()) {
downstream.release();
return;
}
result.getRowSet().writableCast().update(downstream.added(), downstream.removed());
result.notifyListeners(downstream);
}
@Override
public void onFailureInternal(@NotNull final Throwable originalException, Entry sourceEntry) {
ac.propagateFailureToOperators(originalException, sourceEntry);
super.onFailureInternal(originalException, sourceEntry);
}
};
snapshotControl.setListenerAndResult(listener, result);
}
final QueryTable finalResult = ac.transformResult(result);
final boolean noInitialKeys = initialKeys == null || (!initialKeys.isRefreshing() && initialKeys.isEmpty());
if (!input.isRefreshing() && finalResult.getRowSet().isFlat()) {
finalResult.setFlat();
} else if ((input.isAddOnly() || input.isAppendOnly() || input.isBlink()) && (noInitialKeys || preserveEmpty)) {
finalResult.setFlat();
}
return finalResult;
}
private static OperatorAggregationStateManager makeStateManager(
@NotNull final AggregationControl control, @NotNull final QueryTable input,
@NotNull final ColumnSource>[] keySources, @NotNull final ColumnSource>[] reinterpretedKeySources,
@NotNull final AggregationContext ac,
@Nullable final Table symbolTableToUse) {
final OperatorAggregationStateManager stateManager;
if (input.isRefreshing()) {
if (USE_OPEN_ADDRESSED_STATE_MANAGER) {
stateManager = TypedHasherFactory.make(
IncrementalChunkedOperatorAggregationStateManagerOpenAddressedBase.class,
reinterpretedKeySources,
keySources, control.initialHashTableSize(input), control.getMaximumLoadFactor(),
control.getTargetLoadFactor());
} else {
stateManager = TypedHasherFactory.make(
IncrementalChunkedOperatorAggregationStateManagerTypedBase.class, reinterpretedKeySources,
keySources, control.initialHashTableSize(input), control.getMaximumLoadFactor(),
control.getTargetLoadFactor());
}
} else {
if (symbolTableToUse != null) {
stateManager = new StaticSymbolTableChunkedOperatorAggregationStateManager(reinterpretedKeySources[0],
symbolTableToUse);
} else if (USE_OPEN_ADDRESSED_STATE_MANAGER) {
stateManager = TypedHasherFactory.make(
StaticChunkedOperatorAggregationStateManagerOpenAddressedBase.class,
reinterpretedKeySources,
keySources, control.initialHashTableSize(input), control.getMaximumLoadFactor(),
control.getTargetLoadFactor());
} else {
stateManager = TypedHasherFactory.make(
StaticChunkedOperatorAggregationStateManagerTypedBase.class, reinterpretedKeySources,
keySources, control.initialHashTableSize(input), control.getMaximumLoadFactor(),
control.getTargetLoadFactor());
}
}
ac.supplyRowLookup(() -> stateManager::findPositionForKey);
return stateManager;
}
private static TableUpdate adjustForBlinkTable(@NotNull final TableUpdate upstream) {
// Blink table aggregations never have modifies or shifts from their parent:
Assert.assertion(upstream.modified().isEmpty() && upstream.shifted().empty(),
"upstream.modified.empty() && upstream.shifted.empty()");
// Blink table aggregations ignore removes:
if (upstream.removed().isEmpty()) {
return upstream;
}
return new TableUpdateImpl(upstream.added(), RowSetFactory.empty(), upstream.modified(), upstream.shifted(),
upstream.modifiedColumnSet());
}
private static class KeyedUpdateContext implements SafeCloseable {
private final AggregationContext ac;
private final IncrementalOperatorAggregationStateManager incrementalStateManager;
private final ColumnSource[] reinterpretedKeySources;
private final PermuteKernel[] permuteKernels;
private final StateChangeRecorder stateChangeRecorder;
private final TableUpdate upstream; // Not to be mutated
private final MutableInt outputPosition;
private final ModifiedColumnSet updateUpstreamModifiedColumnSet; // Not to be mutated
private final boolean keysModified;
private final boolean shifted;
private final boolean processShifts;
private final OperatorDivision od;
private final RowSetBuilderRandom reincarnatedStatesBuilder;
private final RowSetBuilderRandom emptiedStatesBuilder;
private final RowSetBuilderRandom modifiedStatesBuilder;
private final boolean[] modifiedOperators;
private final SafeCloseableList toClose;
private final IterativeChunkedAggregationOperator.BucketedContext[] bucketedContexts;
private final IntIntTimsortKernel.IntIntSortKernelContext sortKernelContext;
private final HashedRunFinder.HashedRunContext hashedRunContext;
// These are used for all access when only pre- or post-shift (or previous or current) are needed, else for
// pre-shift/previous
private final SharedContext sharedContext;
private final ChunkSource.GetContext[] getContexts;
private final WritableChunk[] workingChunks;
private final WritableLongChunk permutedKeyIndices;
// These are used when post-shift/current values are needed concurrently with pre-shift/previous
private final SharedContext postSharedContext;
private final ChunkSource.GetContext[] postGetContexts;
private final WritableChunk[] postWorkingChunks;
private final WritableLongChunk postPermutedKeyIndices;
// the valueChunks and postValueChunks arrays never own a chunk, they havea reference to workingChunks or a
// chunk returned from a get context, and thus are not closed by this context
private final Chunk extends Values>[] valueChunks;
private final Chunk extends Values>[] postValueChunks;
private final WritableIntChunk runStarts;
private final WritableIntChunk runLengths;
private final WritableIntChunk chunkPositions;
private final WritableIntChunk slots;
private final WritableBooleanChunk modifiedSlots;
private final WritableBooleanChunk slotsModifiedByOperator;
private final SafeCloseable bc;
private final int buildChunkSize;
private final SafeCloseable pc;
private final int probeChunkSize;
private KeyedUpdateContext(@NotNull final AggregationContext ac,
@NotNull final IncrementalOperatorAggregationStateManager incrementalStateManager,
@NotNull final ColumnSource[] reinterpretedKeySources,
@NotNull final PermuteKernel[] permuteKernels,
@NotNull final ModifiedColumnSet keysUpstreamModifiedColumnSet,
@NotNull final ModifiedColumnSet[] operatorInputUpstreamModifiedColumnSets,
@Nullable final StateChangeRecorder stateChangeRecorder,
@NotNull final TableUpdate upstream,
@NotNull final MutableInt outputPosition) {
this.ac = ac;
this.incrementalStateManager = incrementalStateManager;
this.reinterpretedKeySources = reinterpretedKeySources;
this.permuteKernels = permuteKernels;
this.stateChangeRecorder = stateChangeRecorder;
this.upstream = upstream;
this.outputPosition = outputPosition;
updateUpstreamModifiedColumnSet =
upstream.modified().isEmpty() ? ModifiedColumnSet.EMPTY : upstream.modifiedColumnSet();
keysModified = updateUpstreamModifiedColumnSet.containsAny(keysUpstreamModifiedColumnSet);
shifted = upstream.shifted().nonempty();
processShifts = ac.requiresIndices() && shifted;
od = new OperatorDivision(ac, upstream.modified().isNonempty(), updateUpstreamModifiedColumnSet,
operatorInputUpstreamModifiedColumnSets);
final long buildSize = Math.max(upstream.added().size(), keysModified ? upstream.modified().size() : 0);
final long probeSizeForModifies =
(keysModified || od.anyOperatorHasModifiedInputColumns || ac.requiresIndices())
? upstream.modified().size()
: 0;
final long probeSizeWithoutShifts = Math.max(upstream.removed().size(), probeSizeForModifies);
final long probeSize =
processShifts
? UpdateSizeCalculator.chunkSize(probeSizeWithoutShifts, upstream.shifted(), CHUNK_SIZE)
: probeSizeWithoutShifts;
buildChunkSize = chunkSize(buildSize);
probeChunkSize = chunkSize(probeSize);
final int chunkSize = Math.max(buildChunkSize, probeChunkSize);
if (stateChangeRecorder != null) {
reincarnatedStatesBuilder = RowSetFactory.builderRandom();
emptiedStatesBuilder = RowSetFactory.builderRandom();
stateChangeRecorder.startRecording(reincarnatedStatesBuilder::addKey, emptiedStatesBuilder::addKey);
} else {
reincarnatedStatesBuilder = new EmptyRandomBuilder();
emptiedStatesBuilder = new EmptyRandomBuilder();
}
modifiedStatesBuilder = new BitmapRandomBuilder(outputPosition.intValue());
modifiedOperators = new boolean[ac.size()];
toClose = new SafeCloseableList();
bucketedContexts = toClose.addArray(new IterativeChunkedAggregationOperator.BucketedContext[ac.size()]);
ac.initializeBucketedContexts(bucketedContexts, upstream, keysModified,
od.operatorsWithModifiedInputColumns);
final boolean findRuns = ac.requiresRunFinds(SKIP_RUN_FIND);
sortKernelContext =
!findRuns || HASHED_RUN_FIND ? null : toClose.add(IntIntTimsortKernel.createContext(chunkSize));
// even if we are not finding runs because of configuration or operators, we may have a shift in which case
// we still need to find runs
hashedRunContext =
!HASHED_RUN_FIND ? null : toClose.add(new HashedRunFinder.HashedRunContext(chunkSize));
sharedContext = toClose.add(SharedContext.makeSharedContext());
getContexts = toClose.addArray(new ChunkSource.GetContext[ac.size()]);
ac.initializeGetContexts(sharedContext, getContexts, chunkSize);
// noinspection unchecked
workingChunks = toClose.addArray(new WritableChunk[ac.size()]);
valueChunks = new Chunk[ac.size()];
postValueChunks = new Chunk[ac.size()];
ac.initializeWorkingChunks(workingChunks, chunkSize);
permutedKeyIndices =
ac.requiresIndices() || keysModified ? toClose.add(WritableLongChunk.makeWritableChunk(chunkSize))
: null;
postPermutedKeyIndices = processShifts || keysModified // Note that we need this for modified keys because
// we use it to hold removed key indices
? toClose.add(WritableLongChunk.makeWritableChunk(chunkSize))
: null;
if (od.anyOperatorHasModifiedInputColumns || processShifts) {
postSharedContext = toClose.add(SharedContext.makeSharedContext());
postGetContexts = toClose.addArray(new ChunkSource.GetContext[ac.size()]);
ac.initializeGetContexts(postSharedContext, postGetContexts, probeChunkSize);
// noinspection unchecked
postWorkingChunks = toClose.addArray(new WritableChunk[ac.size()]);
ac.initializeWorkingChunks(postWorkingChunks, probeChunkSize);
} else {
postSharedContext = null;
postGetContexts = null;
postWorkingChunks = null;
}
runStarts = toClose.add(WritableIntChunk.makeWritableChunk(chunkSize));
runLengths = toClose.add(WritableIntChunk.makeWritableChunk(chunkSize));
chunkPositions = toClose.add(WritableIntChunk.makeWritableChunk(chunkSize));
slots = toClose.add(WritableIntChunk.makeWritableChunk(chunkSize));
modifiedSlots = toClose.add(WritableBooleanChunk.makeWritableChunk(chunkSize));
slotsModifiedByOperator = toClose.add(WritableBooleanChunk.makeWritableChunk(chunkSize));
if (buildSize > 0) {
bc = toClose.add(
incrementalStateManager.makeAggregationStateBuildContext(reinterpretedKeySources, buildSize));
} else {
bc = null;
}
if (probeSize > 0) {
pc = toClose.add(incrementalStateManager.makeProbeContext(reinterpretedKeySources, probeSize));
} else {
pc = null;
}
}
@Override
public final void close() {
toClose.close();
}
private TableUpdate computeDownstreamIndicesAndCopyKeys(
@NotNull final RowSet upstreamIndex,
@NotNull final ColumnSource>[] keyColumnsRaw,
@NotNull final WritableColumnSource>[] keyColumnsCopied,
@NotNull final ModifiedColumnSet resultModifiedColumnSet,
@NotNull final UnaryOperator[] resultModifiedColumnSetFactories) {
final int firstStateToAdd = outputPosition.intValue();
ac.resetOperatorsForStep(upstream, firstStateToAdd);
if (upstream.removed().isNonempty()) {
doRemoves(upstream.removed());
}
if (upstream.modified().isNonempty() && (od.anyOperatorHasModifiedInputColumns
|| od.anyOperatorWithoutModifiedInputColumnsRequiresIndices || keysModified)) {
try (final ModifySplitResult split =
keysModified ? splitKeyModificationsAndDoKeyChangeRemoves() : null) {
if (processShifts) {
try (final WritableRowSet postShiftRowSet = upstreamIndex.minus(upstream.added())) {
if (keysModified) {
postShiftRowSet.remove(split.keyChangeIndicesPostShift);
}
doShifts(postShiftRowSet); // Also handles shifted same-key modifications for modified-input
// operators that require indices (if any)
}
try (final RowSet keysSameUnshiftedModifies =
keysModified ? null : getUnshiftedModifies()) {
// Do unshifted modifies for everyone
assert !keysModified || split.unshiftedSameSlotIndices != null;
final RowSet unshiftedSameSlotModifies =
keysModified ? split.unshiftedSameSlotIndices : keysSameUnshiftedModifies;
doSameSlotModifies(unshiftedSameSlotModifies, unshiftedSameSlotModifies, true /*
* We don't
* process
* shifts
* unless some
* operator
* requires
* indices
*/,
od.operatorsWithModifiedInputColumns,
od.operatorsWithoutModifiedInputColumnsThatRequireIndices);
if (od.anyOperatorWithModifiedInputColumnsIgnoresIndices) {
// Do shifted same-key modifies for RowSet-only and modified-input operators that don't
// require indices
try (final RowSet removeIndex =
keysModified ? unshiftedSameSlotModifies.union(split.keyChangeIndicesPostShift)
: null;
final RowSet shiftedSameSlotModifiesPost = upstream.modified()
.minus(removeIndex == null ? unshiftedSameSlotModifies : removeIndex);
final WritableRowSet shiftedSameSlotModifiesPre =
shiftedSameSlotModifiesPost.copy()) {
upstream.shifted().unapply(shiftedSameSlotModifiesPre);
doSameSlotModifies(shiftedSameSlotModifiesPre, shiftedSameSlotModifiesPost, true,
od.operatorsWithModifiedInputColumnsThatIgnoreIndices,
od.operatorsThatRequireIndices);
}
} else if (ac.requiresIndices()) {
// Do shifted same-key modifies for RowSet-only operators
try (final WritableRowSet shiftedSameSlotModifiesPost =
upstream.modified().minus(unshiftedSameSlotModifies)) {
if (keysModified) {
shiftedSameSlotModifiesPost.remove(split.keyChangeIndicesPostShift);
}
doSameSlotModifyIndicesOnly(shiftedSameSlotModifiesPost,
od.operatorsThatRequireIndices);
}
}
}
} else if (od.anyOperatorHasModifiedInputColumns) {
assert !keysModified || split.sameSlotIndicesPreShift != null;
assert !keysModified || split.sameSlotIndicesPostShift != null;
doSameSlotModifies(
keysModified ? split.sameSlotIndicesPreShift : upstream.getModifiedPreShift(),
keysModified ? split.sameSlotIndicesPostShift : upstream.modified(),
ac.requiresIndices(),
od.operatorsWithModifiedInputColumns,
od.operatorsWithoutModifiedInputColumnsThatRequireIndices);
} else {
assert !keysModified || split.sameSlotIndicesPostShift != null;
doSameSlotModifyIndicesOnly(
keysModified ? split.sameSlotIndicesPostShift : upstream.modified(),
od.operatorsWithoutModifiedInputColumnsThatRequireIndices);
}
if (keysModified) {
doInserts(split.keyChangeIndicesPostShift, false);
}
}
} else if (processShifts) {
try (final RowSet postShiftRowSet = upstreamIndex.minus(upstream.added())) {
doShifts(postShiftRowSet);
}
}
if (upstream.added().isNonempty()) {
doInserts(upstream.added(), true);
}
if (stateChangeRecorder != null) {
stateChangeRecorder.finishRecording();
}
final TableUpdateImpl downstream = new TableUpdateImpl();
downstream.shifted = RowSetShiftData.EMPTY;
try (final RowSet newStates = makeNewStatesRowSet(firstStateToAdd, outputPosition.intValue() - 1)) {
downstream.added = reincarnatedStatesBuilder.build();
downstream.removed = emptiedStatesBuilder.build();
try (final RowSet addedBack = downstream.added().intersect(downstream.removed())) {
downstream.added().writableCast().remove(addedBack);
downstream.removed().writableCast().remove(addedBack);
if (newStates.isNonempty()) {
downstream.added().writableCast().insert(newStates);
copyKeyColumns(keyColumnsRaw, keyColumnsCopied, newStates);
}
downstream.modified = modifiedStatesBuilder.build();
downstream.modified().writableCast().remove(downstream.added());
downstream.modified().writableCast().remove(downstream.removed());
}
ac.propagateChangesToOperators(downstream, newStates);
}
extractDownstreamModifiedColumnSet(downstream, resultModifiedColumnSet, modifiedOperators,
updateUpstreamModifiedColumnSet, resultModifiedColumnSetFactories);
return downstream;
}
private void doRemoves(@NotNull final RowSequence keyIndicesToRemove) {
if (keyIndicesToRemove.isEmpty()) {
return;
}
try (final RowSequence.Iterator keyIndicesToRemoveIterator = keyIndicesToRemove.getRowSequenceIterator()) {
while (keyIndicesToRemoveIterator.hasMore()) {
doRemovesForChunk(keyIndicesToRemoveIterator.getNextRowSequenceWithLength(CHUNK_SIZE));
}
}
}
private void doRemovesForChunk(@NotNull final RowSequence keyIndicesToRemoveChunk) {
incrementalStateManager.remove(pc, keyIndicesToRemoveChunk, reinterpretedKeySources, slots);
propagateRemovesToOperators(keyIndicesToRemoveChunk, slots);
}
private void propagateRemovesToOperators(@NotNull final RowSequence keyIndicesToRemoveChunk,
@NotNull final WritableIntChunk slotsToRemoveFrom) {
final boolean permute = findSlotRuns(sortKernelContext, hashedRunContext, runStarts, runLengths,
chunkPositions, slotsToRemoveFrom,
ac.requiresRunFinds(SKIP_RUN_FIND));
if (ac.requiresIndices()) {
if (permute) {
final LongChunk keyIndices = keyIndicesToRemoveChunk.asRowKeyChunk();
permutedKeyIndices.setSize(keyIndices.size());
LongPermuteKernel.permuteInput(keyIndices, chunkPositions, permutedKeyIndices);
} else {
keyIndicesToRemoveChunk.fillRowKeyChunk(permutedKeyIndices);
}
}
boolean anyOperatorModified = false;
boolean firstOperator = true;
setFalse(modifiedSlots, runStarts.size());
sharedContext.reset();
for (int oi = 0; oi < ac.size(); ++oi) {
if (!firstOperator) {
setFalse(slotsModifiedByOperator, runStarts.size());
}
final int inputSlot = ac.inputSlot(oi);
if (oi == inputSlot) {
if (permute) {
valueChunks[oi] =
getAndPermuteChunk(ac.inputColumns[oi], getContexts[oi], keyIndicesToRemoveChunk, true,
permuteKernels[oi], chunkPositions, workingChunks[oi]);
} else {
valueChunks[oi] =
getChunk(ac.inputColumns[oi], getContexts[oi], keyIndicesToRemoveChunk, true);
}
}
try {
ac.operators[oi].removeChunk(bucketedContexts[oi], inputSlot >= 0 ? valueChunks[inputSlot] : null,
permutedKeyIndices, slotsToRemoveFrom, runStarts, runLengths,
firstOperator ? modifiedSlots : slotsModifiedByOperator);
} catch (Exception ex) {
throw new AggregationOperatorException(
"Failed to remove data, inputcolumns=" + Arrays.toString(ac.inputNames[oi]) + ", outputs="
+ ac.operators[oi].getResultColumns().keySet(),
ex);
} catch (Error err) {
err.addSuppressed(new AggregationOperatorException(
"Failed to remove data, inputcolumns=" + Arrays.toString(ac.inputNames[oi]) + ", outputs="
+ ac.operators[oi].getResultColumns().keySet()));
throw err;
}
anyOperatorModified = updateModificationState(modifiedOperators, modifiedSlots, slotsModifiedByOperator,
anyOperatorModified, firstOperator, oi);
firstOperator = false;
}
if (anyOperatorModified) {
modifySlots(modifiedStatesBuilder, runStarts, slotsToRemoveFrom, modifiedSlots);
}
}
private void doInserts(@NotNull final RowSequence keyIndicesToInsert, final boolean addToStateManager) {
if (keyIndicesToInsert.isEmpty()) {
return;
}
try (final RowSequence.Iterator keyIndicesToInsertIterator = keyIndicesToInsert.getRowSequenceIterator()) {
while (keyIndicesToInsertIterator.hasMore()) {
doInsertsForChunk(keyIndicesToInsertIterator.getNextRowSequenceWithLength(CHUNK_SIZE),
addToStateManager);
}
}
}
private void doInsertsForChunk(@NotNull final RowSequence keyIndicesToInsertChunk,
final boolean addToStateManager) {
if (addToStateManager) {
incrementalStateManager.add(bc, keyIndicesToInsertChunk, reinterpretedKeySources, outputPosition,
slots);
} else {
incrementalStateManager.findModifications(pc, keyIndicesToInsertChunk, reinterpretedKeySources, slots);
}
propagateInsertsToOperators(keyIndicesToInsertChunk, slots);
}
private void propagateInsertsToOperators(@NotNull final RowSequence keyIndicesToInsertChunk,
@NotNull final WritableIntChunk slotsToAddTo) {
ac.ensureCapacity(outputPosition.intValue());
final boolean permute = findSlotRuns(sortKernelContext, hashedRunContext, runStarts, runLengths,
chunkPositions, slotsToAddTo,
ac.requiresRunFinds(SKIP_RUN_FIND));
if (ac.requiresIndices()) {
if (permute) {
final LongChunk keyIndices = keyIndicesToInsertChunk.asRowKeyChunk();
permutedKeyIndices.setSize(keyIndices.size());
LongPermuteKernel.permuteInput(keyIndices, chunkPositions, permutedKeyIndices);
} else {
keyIndicesToInsertChunk.fillRowKeyChunk(permutedKeyIndices);
}
}
boolean anyOperatorModified = false;
boolean firstOperator = true;
setFalse(modifiedSlots, runStarts.size());
sharedContext.reset();
for (int oi = 0; oi < ac.size(); ++oi) {
if (!firstOperator) {
setFalse(slotsModifiedByOperator, runStarts.size());
}
final int inputSlot = ac.inputSlot(oi);
if (inputSlot == oi) {
if (permute) {
valueChunks[oi] =
getAndPermuteChunk(ac.inputColumns[oi], getContexts[oi], keyIndicesToInsertChunk, false,
permuteKernels[oi], chunkPositions, workingChunks[oi]);
} else {
valueChunks[oi] =
getChunk(ac.inputColumns[oi], getContexts[oi], keyIndicesToInsertChunk, false);
}
}
try {
ac.operators[oi].addChunk(bucketedContexts[oi], inputSlot >= 0 ? valueChunks[inputSlot] : null,
permutedKeyIndices, slotsToAddTo, runStarts, runLengths,
firstOperator ? modifiedSlots : slotsModifiedByOperator);
} catch (Exception ex) {
throw new AggregationOperatorException(
"Failed to add data, inputcolumns=" + Arrays.toString(ac.inputNames[oi]) + ", outputs="
+ ac.operators[oi].getResultColumns().keySet(),
ex);
} catch (Error err) {
err.addSuppressed(new AggregationOperatorException(
"Failed to add data, inputcolumns=" + Arrays.toString(ac.inputNames[oi]) + ", outputs="
+ ac.operators[oi].getResultColumns().keySet()));
throw err;
}
anyOperatorModified = updateModificationState(modifiedOperators, modifiedSlots, slotsModifiedByOperator,
anyOperatorModified, firstOperator, oi);
firstOperator = false;
}
if (anyOperatorModified) {
modifySlots(modifiedStatesBuilder, runStarts, slotsToAddTo, modifiedSlots);
}
}
private void doShifts(@NotNull final RowSet postShiftIndexToProcess) {
if (postShiftIndexToProcess.isEmpty()) {
return;
}
try (final WritableLongChunk preKeyIndices =
WritableLongChunk.makeWritableChunk(probeChunkSize);
final WritableLongChunk postKeyIndices =
WritableLongChunk.makeWritableChunk(probeChunkSize)) {
final Runnable applyChunkedShift = () -> doProcessShiftBucketed(preKeyIndices, postKeyIndices);
processUpstreamShifts(upstream, postShiftIndexToProcess, preKeyIndices, postKeyIndices,
applyChunkedShift);
}
}
private void doProcessShiftBucketed(@NotNull final WritableLongChunk preKeyIndices,
@NotNull final WritableLongChunk postKeyIndices) {
final boolean[] chunkInitialized = new boolean[ac.size()];
final LongChunk usePreKeys;
final LongChunk usePostKeys;
try (final RowSequence preShiftChunkKeys =
RowSequenceFactory.wrapRowKeysChunkAsRowSequence(WritableLongChunk.downcast(preKeyIndices));
final RowSequence postShiftChunkKeys =
RowSequenceFactory
.wrapRowKeysChunkAsRowSequence(WritableLongChunk.downcast(postKeyIndices))) {
sharedContext.reset();
postSharedContext.reset();
Arrays.fill(chunkInitialized, false);
incrementalStateManager.findModifications(pc, postShiftChunkKeys, reinterpretedKeySources, slots);
// We must accumulate shifts into runs for the same slot, if we bounce from slot 1 to 2 and back to 1,
// then the polarity checking logic can have us overwrite things because we wouldn't remove all the
// values from a slot at the same time. Suppose you had
// Slot RowKey
// 1 1
// 2 2
// 1 3
// And a shift of {1-3} + 2. We do not want to allow the 1 to shift over the three by removing 1, adding
// 3; then the 3 would shift to 5 by removing 3 and adding 5. When runs are found you would have the
// 1,3 removed and then 3,5 inserted without conflict.
final boolean permute = findSlotRuns(sortKernelContext, hashedRunContext, runStarts, runLengths,
chunkPositions, slots, true);
if (permute) {
permutedKeyIndices.setSize(preKeyIndices.size());
postPermutedKeyIndices.setSize(postKeyIndices.size());
LongPermuteKernel.permuteInput(preKeyIndices, chunkPositions, permutedKeyIndices);
LongPermuteKernel.permuteInput(postKeyIndices, chunkPositions, postPermutedKeyIndices);
usePreKeys = permutedKeyIndices;
usePostKeys = postPermutedKeyIndices;
} else {
usePreKeys = (LongChunk) preKeyIndices;
usePostKeys = (LongChunk) postKeyIndices;
}
boolean anyOperatorModified = false;
boolean firstOperator = true;
setFalse(modifiedSlots, runStarts.size());
for (int oi = 0; oi < ac.size(); ++oi) {
if (!ac.operators[oi].requiresRowKeys()) {
continue;
}
if (!firstOperator) {
setFalse(slotsModifiedByOperator, runStarts.size());
}
final int inputSlot = ac.inputSlot(oi);
if (inputSlot >= 0 && !chunkInitialized[inputSlot]) {
if (permute) {
valueChunks[inputSlot] = getAndPermuteChunk(ac.inputColumns[inputSlot],
getContexts[inputSlot], preShiftChunkKeys, true,
permuteKernels[inputSlot], chunkPositions, workingChunks[inputSlot]);
postValueChunks[inputSlot] = getAndPermuteChunk(ac.inputColumns[inputSlot],
postGetContexts[inputSlot], postShiftChunkKeys,
false, permuteKernels[inputSlot], chunkPositions, postWorkingChunks[inputSlot]);
} else {
valueChunks[inputSlot] = getChunk(ac.inputColumns[inputSlot], getContexts[inputSlot],
preShiftChunkKeys, true);
postValueChunks[inputSlot] = getChunk(ac.inputColumns[inputSlot],
postGetContexts[inputSlot], postShiftChunkKeys, false);
}
chunkInitialized[inputSlot] = true;
}
try {
ac.operators[oi].shiftChunk(bucketedContexts[oi],
inputSlot >= 0 ? valueChunks[inputSlot] : null,
inputSlot >= 0 ? postValueChunks[inputSlot] : null, usePreKeys,
usePostKeys, slots, runStarts, runLengths,
firstOperator ? modifiedSlots : slotsModifiedByOperator);
} catch (Exception ex) {
throw new AggregationOperatorException(
"Failed to shift data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet(),
ex);
} catch (Error err) {
err.addSuppressed(new AggregationOperatorException(
"Failed to shift data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet()));
throw err;
}
anyOperatorModified = updateModificationState(modifiedOperators, modifiedSlots,
slotsModifiedByOperator, anyOperatorModified, firstOperator, oi);
firstOperator = false;
}
if (anyOperatorModified) {
modifySlots(modifiedStatesBuilder, runStarts, slots, modifiedSlots);
}
}
}
private void doSameSlotModifies(@NotNull final RowSequence preShiftKeyIndicesToModify,
@NotNull final RowSequence postShiftKeyIndicesToModify,
final boolean supplyPostIndices, @NotNull final boolean[] operatorsToProcess,
@NotNull final boolean[] operatorsToProcessIndicesOnly) {
final boolean shifted = preShiftKeyIndicesToModify != postShiftKeyIndicesToModify;
try (final RowSequence.Iterator preShiftIterator = preShiftKeyIndicesToModify.getRowSequenceIterator();
final RowSequence.Iterator postShiftIterator =
shifted ? postShiftKeyIndicesToModify.getRowSequenceIterator() : null) {
final boolean[] chunkInitialized = new boolean[ac.size()];
while (preShiftIterator.hasMore()) {
final RowSequence preShiftKeyIndicesChunk =
preShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE);
final RowSequence postShiftKeyIndicesChunk =
shifted ? postShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE)
: preShiftKeyIndicesChunk;
sharedContext.reset();
postSharedContext.reset();
Arrays.fill(chunkInitialized, false);
incrementalStateManager.findModifications(pc, postShiftKeyIndicesChunk, reinterpretedKeySources,
slots);
final boolean permute = findSlotRuns(sortKernelContext, hashedRunContext, runStarts, runLengths,
chunkPositions, slots,
ac.requiresRunFinds(SKIP_RUN_FIND));
if (supplyPostIndices) {
if (permute) {
final LongChunk postKeyIndices =
postShiftKeyIndicesChunk.asRowKeyChunk();
permutedKeyIndices.setSize(postKeyIndices.size());
LongPermuteKernel.permuteInput(postKeyIndices, chunkPositions, permutedKeyIndices);
} else {
postShiftKeyIndicesChunk.fillRowKeyChunk(permutedKeyIndices);
}
}
boolean anyOperatorModified = false;
boolean firstOperator = true;
setFalse(modifiedSlots, runStarts.size());
for (int oi = 0; oi < ac.size(); ++oi) {
if (!operatorsToProcessIndicesOnly[oi] && !operatorsToProcess[oi]) {
continue;
}
if (!firstOperator) {
setFalse(slotsModifiedByOperator, runStarts.size());
}
if (operatorsToProcessIndicesOnly[oi]) {
try {
ac.operators[oi].modifyRowKeys(bucketedContexts[oi], permutedKeyIndices, slots,
runStarts,
runLengths, firstOperator ? modifiedSlots : slotsModifiedByOperator);
} catch (Exception ex) {
throw new AggregationOperatorException(
"Failed to modify data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet(),
ex);
} catch (Error err) {
err.addSuppressed(new AggregationOperatorException(
"Failed to modify data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet()));
throw err;
}
} else /* operatorsToProcess[oi] */ {
final int inputSlot = ac.inputSlot(oi);
if (inputSlot >= 0 && !chunkInitialized[inputSlot]) {
if (permute) {
valueChunks[inputSlot] = getAndPermuteChunk(ac.inputColumns[inputSlot],
getContexts[inputSlot],
preShiftKeyIndicesChunk, true, permuteKernels[inputSlot], chunkPositions,
workingChunks[inputSlot]);
postValueChunks[inputSlot] =
getAndPermuteChunk(ac.inputColumns[inputSlot], postGetContexts[inputSlot],
postShiftKeyIndicesChunk, false, permuteKernels[inputSlot],
chunkPositions,
postWorkingChunks[inputSlot]);
} else {
valueChunks[inputSlot] =
getChunk(ac.inputColumns[inputSlot], getContexts[inputSlot],
preShiftKeyIndicesChunk, true);
postValueChunks[inputSlot] =
getChunk(ac.inputColumns[inputSlot], postGetContexts[inputSlot],
postShiftKeyIndicesChunk, false);
}
chunkInitialized[inputSlot] = true;
}
try {
ac.operators[oi].modifyChunk(bucketedContexts[oi],
inputSlot >= 0 ? valueChunks[inputSlot] : null,
inputSlot >= 0 ? postValueChunks[inputSlot] : null, permutedKeyIndices, slots,
runStarts, runLengths, firstOperator ? modifiedSlots : slotsModifiedByOperator);
} catch (Exception ex) {
throw new AggregationOperatorException(
"Failed to modify data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet(),
ex);
} catch (Error er) {
er.addSuppressed(new AggregationOperatorException(
"Failed to modify data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet()));
throw er;
}
}
anyOperatorModified = updateModificationState(modifiedOperators, modifiedSlots,
slotsModifiedByOperator, anyOperatorModified, firstOperator, oi);
firstOperator = false;
}
if (anyOperatorModified) {
modifySlots(modifiedStatesBuilder, runStarts, slots, modifiedSlots);
}
}
}
}
private void doSameSlotModifyIndicesOnly(@NotNull final RowSequence postShiftKeyIndicesToModify,
@NotNull final boolean[] operatorsToProcessIndicesOnly) {
try (final RowSequence.Iterator postShiftIterator = postShiftKeyIndicesToModify.getRowSequenceIterator()) {
while (postShiftIterator.hasMore()) {
final RowSequence postShiftKeyIndicesChunk =
postShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE);
incrementalStateManager.findModifications(pc, postShiftKeyIndicesChunk, reinterpretedKeySources,
slots);
final boolean permute = findSlotRuns(sortKernelContext, hashedRunContext, runStarts, runLengths,
chunkPositions, slots,
ac.requiresRunFinds(SKIP_RUN_FIND));
if (permute) {
final LongChunk postKeyIndices = postShiftKeyIndicesChunk.asRowKeyChunk();
permutedKeyIndices.setSize(postKeyIndices.size());
LongPermuteKernel.permuteInput(postKeyIndices, chunkPositions, permutedKeyIndices);
} else {
postShiftKeyIndicesChunk.fillRowKeyChunk(permutedKeyIndices);
}
boolean anyOperatorModified = false;
boolean firstOperator = true;
setFalse(modifiedSlots, runStarts.size());
for (int oi = 0; oi < ac.size(); ++oi) {
if (!operatorsToProcessIndicesOnly[oi]) {
continue;
}
if (!firstOperator) {
setFalse(slotsModifiedByOperator, runStarts.size());
}
try {
ac.operators[oi].modifyRowKeys(bucketedContexts[oi], permutedKeyIndices, slots, runStarts,
runLengths, firstOperator ? modifiedSlots : slotsModifiedByOperator);
} catch (Exception ex) {
throw new AggregationOperatorException(
"Failed to modify data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet(),
ex);
} catch (Error err) {
err.addSuppressed(new AggregationOperatorException(
"Failed to modify data, inputcolumns=" + Arrays.toString(ac.inputNames[oi])
+ ", outputs=" + ac.operators[oi].getResultColumns().keySet()));
throw err;
}
anyOperatorModified = updateModificationState(modifiedOperators, modifiedSlots,
slotsModifiedByOperator, anyOperatorModified, firstOperator, oi);
firstOperator = false;
}
if (anyOperatorModified) {
modifySlots(modifiedStatesBuilder, runStarts, slots, modifiedSlots);
}
}
}
}
private static class ModifySplitResult implements SafeCloseable {
/**
* This is a partition of same-slot modifies for row keys that were not shifted. Needed for modifyChunk of
* input-modified operators that require indices, since they handle the shifted same-slot modifies in
* shiftChunk.
*/
@Nullable
private final RowSet unshiftedSameSlotIndices;
/**
* This is all of same-slot modified, with row keys in pre-shift space. Needed for modifyChunk of
* input-modified operators that don't require indices.
*/
@Nullable
private final RowSet sameSlotIndicesPreShift;
/**
* This is all of same-slot modified, with row keys in post-shift space. Needed for modifyChunk of
* input-modified operators that don't require indices, and for modifyRowKeys of operators that require
* indices but don't have any inputs modified.
*/
@Nullable
private final RowSet sameSlotIndicesPostShift;
/**
* This is all key change modifies, with row keys in post-shift space. Needed for addChunk to process key
* changes for all operators.
*/
@NotNull
private final RowSet keyChangeIndicesPostShift;
private ModifySplitResult(@Nullable final RowSet unshiftedSameSlotIndices,
@Nullable final RowSet sameSlotIndicesPreShift,
@Nullable final RowSet sameSlotIndicesPostShift,
@NotNull final RowSet keyChangeIndicesPostShift) {
this.unshiftedSameSlotIndices = unshiftedSameSlotIndices;
this.sameSlotIndicesPreShift = sameSlotIndicesPreShift;
this.sameSlotIndicesPostShift = sameSlotIndicesPostShift;
this.keyChangeIndicesPostShift = keyChangeIndicesPostShift;
}
@Override
public final void close() {
if (unshiftedSameSlotIndices != null) {
unshiftedSameSlotIndices.close();
}
if (sameSlotIndicesPreShift != null) {
sameSlotIndicesPreShift.close();
}
if (sameSlotIndicesPostShift != null) {
sameSlotIndicesPostShift.close();
}
keyChangeIndicesPostShift.close();
}
}
private ModifySplitResult splitKeyModificationsAndDoKeyChangeRemoves() {
Require.requirement(keysModified, "keysModified");
final boolean needUnshiftedSameSlotIndices = processShifts;
final boolean needSameSlotIndicesPreShift = !processShifts && od.anyOperatorHasModifiedInputColumns;
final boolean needSameSlotIndicesPostShift = !processShifts && (od.anyOperatorHasModifiedInputColumns
|| od.anyOperatorWithoutModifiedInputColumnsRequiresIndices || keysModified);
final RowSetBuilderSequential unshiftedSameSlotIndicesBuilder =
needUnshiftedSameSlotIndices ? RowSetFactory.builderSequential() : null;
final RowSetBuilderSequential sameSlotIndicesPreShiftBuilder =
needSameSlotIndicesPreShift ? RowSetFactory.builderSequential() : null;
final RowSetBuilderSequential sameSlotIndicesPostShiftBuilder =
needSameSlotIndicesPostShift ? RowSetFactory.builderSequential() : null;
final RowSetBuilderSequential keyChangeIndicesPostShiftBuilder =
RowSetFactory.builderSequential();
try (final RowSequence.Iterator modifiedPreShiftIterator =
upstream.getModifiedPreShift().getRowSequenceIterator();
final RowSequence.Iterator modifiedPostShiftIterator =
shifted ? upstream.modified().getRowSequenceIterator() : null;
final WritableIntChunk postSlots = WritableIntChunk.makeWritableChunk(buildChunkSize)) {
// Hijacking postPermutedKeyIndices because it's not used in this loop; the rename hopefully makes the
// code much clearer!
final WritableLongChunk removedKeyIndices = postPermutedKeyIndices;
while (modifiedPreShiftIterator.hasMore()) {
final RowSequence modifiedPreShiftChunk =
modifiedPreShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE);
final RowSequence modifiedPostShiftChunk =
shifted ? modifiedPostShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE)
: modifiedPreShiftChunk;
incrementalStateManager.remove(pc, modifiedPreShiftChunk, reinterpretedKeySources, slots);
incrementalStateManager.add(bc, modifiedPostShiftChunk, reinterpretedKeySources, outputPosition,
postSlots);
final LongChunk preShiftIndices = modifiedPreShiftChunk.asRowKeyChunk();
final LongChunk postShiftIndices =
shifted ? modifiedPostShiftChunk.asRowKeyChunk() : preShiftIndices;
final int chunkSize = slots.size();
int numKeyChanges = 0;
for (int si = 0; si < chunkSize; ++si) {
final int previousSlot = slots.get(si);
final int currentSlot = postSlots.get(si);
final long previousIndex = preShiftIndices.get(si);
final long currentIndex = postShiftIndices.get(si);
if (previousSlot == currentSlot) {
if (previousIndex == currentIndex && needUnshiftedSameSlotIndices) {
unshiftedSameSlotIndicesBuilder.appendKey(currentIndex);
}
if (needSameSlotIndicesPreShift) {
sameSlotIndicesPreShiftBuilder.appendKey(previousIndex);
}
if (needSameSlotIndicesPostShift) {
sameSlotIndicesPostShiftBuilder.appendKey(currentIndex);
}
} else {
slots.set(numKeyChanges, previousSlot);
removedKeyIndices.set(numKeyChanges++, previousIndex);
keyChangeIndicesPostShiftBuilder.appendKey(currentIndex);
}
}
if (numKeyChanges > 0) {
slots.setSize(numKeyChanges);
removedKeyIndices.setSize(numKeyChanges);
try (final RowSequence keyIndicesToRemoveChunk = RowSequenceFactory
.wrapRowKeysChunkAsRowSequence(LongChunk.downcast(removedKeyIndices))) {
propagateRemovesToOperators(keyIndicesToRemoveChunk, slots);
}
}
}
}
return new ModifySplitResult(
needUnshiftedSameSlotIndices ? unshiftedSameSlotIndicesBuilder.build() : null,
needSameSlotIndicesPreShift ? sameSlotIndicesPreShiftBuilder.build() : null,
needSameSlotIndicesPostShift ? sameSlotIndicesPostShiftBuilder.build() : null,
keyChangeIndicesPostShiftBuilder.build());
}
private RowSet getUnshiftedModifies() {
Require.requirement(!keysModified, "!keysModified");
Require.requirement(shifted, "shifted");
return extractUnshiftedModifiesFromUpstream(upstream);
}
private static boolean updateModificationState(@NotNull final boolean[] modifiedOperators,
@NotNull final WritableBooleanChunk modifiedSlots,
@NotNull final BooleanChunk slotsModifiedByOperator, boolean operatorModified,
final boolean firstOperator, final int operatorIndex) {
final boolean chunkModifiedSlots;
if (firstOperator) {
chunkModifiedSlots = anyTrue(modifiedSlots);
operatorModified = chunkModifiedSlots;
} else {
chunkModifiedSlots = orInto(slotsModifiedByOperator, modifiedSlots);
operatorModified |= chunkModifiedSlots;
}
modifiedOperators[operatorIndex] |= chunkModifiedSlots;
return operatorModified;
}
}
private static RowSet extractUnshiftedModifiesFromUpstream(@NotNull final TableUpdate upstream) {
final RowSetBuilderSequential unshiftedModifiesBuilder = RowSetFactory.builderSequential();
try (final RowSequence.Iterator modifiedPreShiftIterator =
upstream.getModifiedPreShift().getRowSequenceIterator();
final RowSequence.Iterator modifiedPostShiftIterator = upstream.modified().getRowSequenceIterator()) {
while (modifiedPreShiftIterator.hasMore()) {
final RowSequence modifiedPreShiftChunk =
modifiedPreShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE);
final RowSequence modifiedPostShiftChunk =
modifiedPostShiftIterator.getNextRowSequenceWithLength(CHUNK_SIZE);
final LongChunk preShiftIndices = modifiedPreShiftChunk.asRowKeyChunk();
final LongChunk postShiftIndices = modifiedPostShiftChunk.asRowKeyChunk();
final int chunkSize = preShiftIndices.size();
for (int ki = 0; ki < chunkSize; ++ki) {
final long previousIndex = preShiftIndices.get(ki);
final long currentIndex = postShiftIndices.get(ki);
if (previousIndex == currentIndex) {
unshiftedModifiesBuilder.appendKey(currentIndex);
}
}
}
}
return unshiftedModifiesBuilder.build();
}
private static void extractDownstreamModifiedColumnSet(@NotNull final TableUpdateImpl downstream,
@NotNull final ModifiedColumnSet resultModifiedColumnSet,
@NotNull final boolean[] modifiedOperators,
@NotNull final ModifiedColumnSet updateUpstreamModifiedColumnSet,
@NotNull final UnaryOperator[] resultModifiedColumnSetFactories) {
if (downstream.modified().isNonempty()) {
downstream.modifiedColumnSet = resultModifiedColumnSet;
downstream.modifiedColumnSet().clear();
for (int oi = 0; oi < modifiedOperators.length; ++oi) {
if (modifiedOperators[oi]) {
downstream.modifiedColumnSet()
.setAll(resultModifiedColumnSetFactories[oi].apply(updateUpstreamModifiedColumnSet));
}
}
} else {
downstream.modifiedColumnSet = ModifiedColumnSet.EMPTY;
}
if (downstream.modifiedColumnSet().empty() && downstream.modified().isNonempty()) {
downstream.modified().close();
downstream.modified = RowSetFactory.empty();
}
}
private static class OperatorDivision {
private final boolean anyOperatorHasModifiedInputColumns;
private final boolean[] operatorsWithModifiedInputColumns;
private final boolean anyOperatorWithModifiedInputColumnsIgnoresIndices;
private final boolean[] operatorsWithModifiedInputColumnsThatIgnoreIndices;
private final boolean anyOperatorWithoutModifiedInputColumnsRequiresIndices;
private final boolean[] operatorsWithoutModifiedInputColumnsThatRequireIndices;
private final boolean[] operatorsThatRequireIndices;
private OperatorDivision(@NotNull final AggregationContext ac,
final boolean upstreamModified,
@NotNull final ModifiedColumnSet updateUpstreamModifiedColumnSet,
@NotNull final ModifiedColumnSet[] operatorInputUpstreamModifiedColumnSets) {
operatorsThatRequireIndices = new boolean[ac.size()];
for (int oi = 0; oi < ac.size(); ++oi) {
operatorsThatRequireIndices[oi] = ac.operators[oi].requiresRowKeys();
}
operatorsWithModifiedInputColumns = new boolean[ac.size()];
operatorsWithModifiedInputColumnsThatIgnoreIndices = new boolean[ac.size()];
operatorsWithoutModifiedInputColumnsThatRequireIndices = new boolean[ac.size()];
boolean anyOperatorHasModifiedInputColumnsTemp = false;
boolean anyOperatorWithModifiedInputColumnsIgnoresIndicesTemp = false;
boolean anyOperatorWithoutModifiedInputColumnsRequiresIndicesTemp = false;
if (upstreamModified) {
for (int oi = 0; oi < ac.size(); ++oi) {
if (updateUpstreamModifiedColumnSet.containsAny(operatorInputUpstreamModifiedColumnSets[oi])) {
operatorsWithModifiedInputColumns[oi] = true;
anyOperatorHasModifiedInputColumnsTemp = true;
if (!ac.operators[oi].requiresRowKeys()) {
operatorsWithModifiedInputColumnsThatIgnoreIndices[oi] = true;
anyOperatorWithModifiedInputColumnsIgnoresIndicesTemp = true;
}
} else if (ac.operators[oi].requiresRowKeys()) {
operatorsWithoutModifiedInputColumnsThatRequireIndices[oi] = true;
anyOperatorWithoutModifiedInputColumnsRequiresIndicesTemp = true;
}
}
}
anyOperatorHasModifiedInputColumns = anyOperatorHasModifiedInputColumnsTemp;
anyOperatorWithModifiedInputColumnsIgnoresIndices = anyOperatorWithModifiedInputColumnsIgnoresIndicesTemp;
anyOperatorWithoutModifiedInputColumnsRequiresIndices =
anyOperatorWithoutModifiedInputColumnsRequiresIndicesTemp;
}
}
private static void processUpstreamShifts(TableUpdate upstream, RowSet useIndex,
WritableLongChunk preKeyIndices, WritableLongChunk postKeyIndices,
Runnable applyChunkedShift) {
RowSet.SearchIterator postOkForward = null;
RowSet.SearchIterator postOkReverse = null;
boolean lastPolarityReversed = false; // the initial value doesn't matter, because we'll just have a noop apply
// in the worst case
int writePosition = resetWritePosition(lastPolarityReversed, preKeyIndices, postKeyIndices);
final RowSetShiftData.Iterator shiftIt = upstream.shifted().applyIterator();
while (shiftIt.hasNext()) {
shiftIt.next();
final boolean polarityReversed = shiftIt.polarityReversed();
if (polarityReversed != lastPolarityReversed) {
// if our polarity changed, we must flush out the shifts that are pending
maybeApplyChunkedShift(applyChunkedShift, preKeyIndices, postKeyIndices, lastPolarityReversed,
writePosition);
writePosition = resetWritePosition(polarityReversed, preKeyIndices, postKeyIndices);
}
final long delta = shiftIt.shiftDelta();
final long endRange = shiftIt.endRange() + delta;
final long beginRange = shiftIt.beginRange() + delta;
if (polarityReversed) {
// we must apply these shifts reversed
if (postOkReverse == null) {
postOkReverse = useIndex.reverseIterator();
}
if (postOkReverse.advance(endRange)) {
long idx;
while ((idx = postOkReverse.currentValue()) >= beginRange) {
postKeyIndices.set(--writePosition, idx);
preKeyIndices.set(writePosition, idx - delta);
if (writePosition == 0) {
// once we fill a chunk, we must process the shifts
maybeApplyChunkedShift(applyChunkedShift, preKeyIndices, postKeyIndices, polarityReversed,
writePosition);
writePosition = resetWritePosition(polarityReversed, preKeyIndices, postKeyIndices);
}
if (postOkReverse.hasNext()) {
postOkReverse.nextLong();
} else {
break;
}
}
}
} else {
if (postOkReverse != null) {
postOkReverse.close();
postOkReverse = null;
}
if (postOkForward == null) {
postOkForward = useIndex.searchIterator();
}
// we can apply these in a forward direction as normal, we just need to accumulate into our key chunks
if (postOkForward.advance(beginRange)) {
long idx;
while ((idx = postOkForward.currentValue()) <= endRange) {
postKeyIndices.add(idx);
preKeyIndices.add(idx - delta);
if (postKeyIndices.size() == postKeyIndices.capacity()) {
// once we fill a chunk, we must process the shifts
maybeApplyChunkedShift(applyChunkedShift, preKeyIndices, postKeyIndices, polarityReversed,
writePosition);
writePosition = resetWritePosition(polarityReversed, preKeyIndices, postKeyIndices);
}
if (postOkForward.hasNext()) {
postOkForward.nextLong();
} else {
break;
}
}
}
}
lastPolarityReversed = polarityReversed;
}
// after we are done, we should process the shift
maybeApplyChunkedShift(applyChunkedShift, preKeyIndices, postKeyIndices, lastPolarityReversed, writePosition);
// close our iterators
if (postOkReverse != null) {
postOkReverse.close();
}
if (postOkForward != null) {
postOkForward.close();
}
}
private static int resetWritePosition(boolean polarityReversed, WritableLongChunk preKeyIndices,
WritableLongChunk postKeyIndices) {
if (polarityReversed) {
postKeyIndices.setSize(postKeyIndices.capacity());
if (preKeyIndices != null) {
preKeyIndices.setSize(postKeyIndices.capacity());
}
return postKeyIndices.capacity();
} else {
postKeyIndices.setSize(0);
if (preKeyIndices != null) {
preKeyIndices.setSize(0);
}
return 0;
}
}
private static void maybeApplyChunkedShift(Runnable applyChunkedShift,
WritableLongChunk preKeyIndices, WritableLongChunk postKeyIndices,
boolean polarityReversed, int writePosition) {
if (polarityReversed) {
int chunkSize = postKeyIndices.capacity();
if (writePosition == chunkSize) {
return;
}
if (writePosition > 0) {
postKeyIndices.copyFromTypedChunk(postKeyIndices, writePosition, 0, chunkSize - writePosition);
postKeyIndices.setSize(chunkSize - writePosition);
if (preKeyIndices != null) {
preKeyIndices.copyFromTypedChunk(preKeyIndices, writePosition, 0, chunkSize - writePosition);
preKeyIndices.setSize(chunkSize - writePosition);
}
}
} else {
if (postKeyIndices.size() == 0) {
return;
}
}
applyChunkedShift.run();
}
private static void setFalse(WritableBooleanChunk modifiedSlots, int size) {
modifiedSlots.fillWithValue(0, size, false);
modifiedSlots.setSize(size);
}
private static boolean orInto(BooleanChunk operatorSlots, WritableBooleanChunk modifiedSlots) {
boolean anyTrue = false;
for (int ii = 0; ii < operatorSlots.size(); ++ii) {
if (operatorSlots.get(ii)) {
anyTrue = true;
modifiedSlots.set(ii, true);
}
}
return anyTrue;
}
private static boolean anyTrue(BooleanChunk operatorSlots) {
for (int ii = 0; ii < operatorSlots.size(); ++ii) {
if (operatorSlots.get(ii)) {
return true;
}
}
return false;
}
/**
* @return true if we must permute the inputs
*/
private static boolean findSlotRuns(
IntIntTimsortKernel.IntIntSortKernelContext sortKernelContext,
HashedRunFinder.HashedRunContext hashedRunContext,
WritableIntChunk runStarts, WritableIntChunk runLengths,
WritableIntChunk chunkPosition, WritableIntChunk slots,
boolean findRuns) {
if (!findRuns) {
chunkPosition.setSize(slots.size());
ChunkUtils.fillInOrder(chunkPosition);
IntFindRunsKernel.findRunsSingles(slots, runStarts, runLengths);
return false;
} else if (HASHED_RUN_FIND) {
return HashedRunFinder.findRunsHashed(hashedRunContext, runStarts, runLengths, chunkPosition, slots);
} else {
chunkPosition.setSize(slots.size());
ChunkUtils.fillInOrder(chunkPosition);
IntIntTimsortKernel.sort(sortKernelContext, chunkPosition, slots);
IntFindRunsKernel.findRunsSingles(slots, runStarts, runLengths);
return true;
}
}
/**
* Get values from the inputColumn, and permute them into workingChunk.
*/
private static Chunk extends Values> getAndPermuteChunk(ChunkSource.WithPrev inputColumn,
ChunkSource.GetContext getContext,
RowSequence chunkOk, boolean usePrev, PermuteKernel permuteKernel, IntChunk chunkPosition,
WritableChunk workingChunk) {
final Chunk extends Values> values = getChunk(inputColumn, getContext, chunkOk, usePrev);
// permute the chunk based on the chunkPosition, so that we have values from a slot together
if (values != null) {
workingChunk.setSize(values.size());
permuteKernel.permuteInput(values, chunkPosition, workingChunk);
}
return workingChunk;
}
@Nullable
private static Chunk extends Values> getChunk(ChunkSource.WithPrev inputColumn,
ChunkSource.GetContext getContext, RowSequence chunkOk, boolean usePrev) {
final Chunk extends Values> values;
if (inputColumn == null) {
values = null;
} else if (usePrev) {
values = inputColumn.getPrevChunk(getContext, chunkOk);
} else {
values = inputColumn.getChunk(getContext, chunkOk);
}
return values;
}
private static void modifySlots(RowSetBuilderRandom modifiedBuilder, IntChunk runStarts,
WritableIntChunk slots, BooleanChunk modified) {
int outIndex = 0;
for (int runIndex = 0; runIndex < runStarts.size(); ++runIndex) {
if (modified.get(runIndex)) {
final int slotStart = runStarts.get(runIndex);
final int slot = slots.get(slotStart);
slots.set(outIndex++, slot);
}
}
slots.setSize(outIndex);
modifiedBuilder.addRowKeysChunk(slots);
}
@NotNull
private static QueryTable staticGroupedAggregation(QueryTable withView, String keyName, ColumnSource> keySource,
AggregationContext ac) {
final Pair> groupKeyIndexTable;
final Map
© 2015 - 2024 Weber Informatics LLC | Privacy Policy