
io.prestosql.operator.aggregation.builder.InMemoryHashAggregationBuilder Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.operator.aggregation.builder;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.ListenableFuture;
import io.airlift.units.DataSize;
import io.prestosql.array.IntBigArray;
import io.prestosql.operator.GroupByHash;
import io.prestosql.operator.GroupByIdBlock;
import io.prestosql.operator.HashCollisionsCounter;
import io.prestosql.operator.OperatorContext;
import io.prestosql.operator.TransformWork;
import io.prestosql.operator.UpdateMemory;
import io.prestosql.operator.Work;
import io.prestosql.operator.WorkProcessor;
import io.prestosql.operator.WorkProcessor.ProcessState;
import io.prestosql.operator.aggregation.AccumulatorFactory;
import io.prestosql.operator.aggregation.GroupedAccumulator;
import io.prestosql.spi.Page;
import io.prestosql.spi.PageBuilder;
import io.prestosql.spi.block.BlockBuilder;
import io.prestosql.spi.type.Type;
import io.prestosql.sql.gen.JoinCompiler;
import io.prestosql.sql.planner.plan.AggregationNode;
import io.prestosql.sql.planner.plan.AggregationNode.Step;
import io.prestosql.type.BlockTypeOperators;
import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntIterators;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.OptionalLong;
import static com.google.common.base.Preconditions.checkArgument;
import static io.prestosql.SystemSessionProperties.isDictionaryAggregationEnabled;
import static io.prestosql.operator.GroupByHash.createGroupByHash;
import static io.prestosql.spi.type.BigintType.BIGINT;
import static java.util.Objects.requireNonNull;
public class InMemoryHashAggregationBuilder
implements HashAggregationBuilder
{
private final GroupByHash groupByHash;
private final List aggregators;
private final boolean partial;
private final OptionalLong maxPartialMemory;
private final UpdateMemory updateMemory;
private boolean full;
public InMemoryHashAggregationBuilder(
List accumulatorFactories,
Step step,
int expectedGroups,
List groupByTypes,
List groupByChannels,
Optional hashChannel,
OperatorContext operatorContext,
Optional maxPartialMemory,
JoinCompiler joinCompiler,
BlockTypeOperators blockTypeOperators,
UpdateMemory updateMemory)
{
this(accumulatorFactories,
step,
expectedGroups,
groupByTypes,
groupByChannels,
hashChannel,
operatorContext,
maxPartialMemory,
Optional.empty(),
joinCompiler,
blockTypeOperators,
updateMemory);
}
public InMemoryHashAggregationBuilder(
List accumulatorFactories,
Step step,
int expectedGroups,
List groupByTypes,
List groupByChannels,
Optional hashChannel,
OperatorContext operatorContext,
Optional maxPartialMemory,
Optional overwriteIntermediateChannelOffset,
JoinCompiler joinCompiler,
BlockTypeOperators blockTypeOperators,
UpdateMemory updateMemory)
{
this.groupByHash = createGroupByHash(
groupByTypes,
Ints.toArray(groupByChannels),
hashChannel,
expectedGroups,
isDictionaryAggregationEnabled(operatorContext.getSession()),
joinCompiler,
blockTypeOperators,
updateMemory);
this.partial = step.isOutputPartial();
this.maxPartialMemory = maxPartialMemory.map(dataSize -> OptionalLong.of(dataSize.toBytes())).orElseGet(OptionalLong::empty);
this.updateMemory = requireNonNull(updateMemory, "updateMemory is null");
// wrapper each function with an aggregator
ImmutableList.Builder builder = ImmutableList.builder();
requireNonNull(accumulatorFactories, "accumulatorFactories is null");
for (int i = 0; i < accumulatorFactories.size(); i++) {
AccumulatorFactory accumulatorFactory = accumulatorFactories.get(i);
Optional overwriteIntermediateChannel = Optional.empty();
if (overwriteIntermediateChannelOffset.isPresent()) {
overwriteIntermediateChannel = Optional.of(overwriteIntermediateChannelOffset.get() + i);
}
builder.add(new Aggregator(accumulatorFactory, step, overwriteIntermediateChannel));
}
aggregators = builder.build();
}
@Override
public void close() {}
@Override
public Work> processPage(Page page)
{
if (aggregators.isEmpty()) {
return groupByHash.addPage(page);
}
else {
return new TransformWork<>(
groupByHash.getGroupIds(page),
groupByIdBlock -> {
for (Aggregator aggregator : aggregators) {
aggregator.processPage(groupByIdBlock, page);
}
// we do not need any output from TransformWork for this case
return null;
});
}
}
@Override
public void updateMemory()
{
updateMemory.update();
}
@Override
public boolean isFull()
{
return full;
}
@Override
public void recordHashCollisions(HashCollisionsCounter hashCollisionsCounter)
{
hashCollisionsCounter.recordHashCollision(groupByHash.getHashCollisions(), groupByHash.getExpectedHashCollisions());
}
public long getHashCollisions()
{
return groupByHash.getHashCollisions();
}
public double getExpectedHashCollisions()
{
return groupByHash.getExpectedHashCollisions();
}
@Override
public ListenableFuture> startMemoryRevoke()
{
throw new UnsupportedOperationException("startMemoryRevoke not supported for InMemoryHashAggregationBuilder");
}
@Override
public void finishMemoryRevoke()
{
throw new UnsupportedOperationException("finishMemoryRevoke not supported for InMemoryHashAggregationBuilder");
}
public long getSizeInMemory()
{
long sizeInMemory = groupByHash.getEstimatedSize();
for (Aggregator aggregator : aggregators) {
sizeInMemory += aggregator.getEstimatedSize();
}
updateIsFull(sizeInMemory);
return sizeInMemory;
}
private void updateIsFull(long sizeInMemory)
{
if (!partial || maxPartialMemory.isEmpty()) {
return;
}
full = sizeInMemory > maxPartialMemory.getAsLong();
}
/**
* building hash sorted results requires memory for sorting group IDs.
* This method returns size of that memory requirement.
*/
public long getGroupIdsSortingSize()
{
return getGroupCount() * Integer.BYTES;
}
public void setOutputPartial()
{
for (Aggregator aggregator : aggregators) {
aggregator.setOutputPartial();
}
}
public int getKeyChannels()
{
return groupByHash.getTypes().size();
}
public long getGroupCount()
{
return groupByHash.getGroupCount();
}
@Override
public WorkProcessor buildResult()
{
for (Aggregator aggregator : aggregators) {
aggregator.prepareFinal();
}
return buildResult(consecutiveGroupIds());
}
public WorkProcessor buildHashSortedResult()
{
return buildResult(hashSortedGroupIds());
}
public List buildIntermediateTypes()
{
ArrayList types = new ArrayList<>(groupByHash.getTypes());
for (InMemoryHashAggregationBuilder.Aggregator aggregator : aggregators) {
types.add(aggregator.getIntermediateType());
}
return types;
}
@VisibleForTesting
public int getCapacity()
{
return groupByHash.getCapacity();
}
private WorkProcessor buildResult(IntIterator groupIds)
{
PageBuilder pageBuilder = new PageBuilder(buildTypes());
return WorkProcessor.create(() -> {
if (!groupIds.hasNext()) {
return ProcessState.finished();
}
pageBuilder.reset();
List types = groupByHash.getTypes();
while (!pageBuilder.isFull() && groupIds.hasNext()) {
int groupId = groupIds.nextInt();
groupByHash.appendValuesTo(groupId, pageBuilder, 0);
pageBuilder.declarePosition();
for (int i = 0; i < aggregators.size(); i++) {
Aggregator aggregator = aggregators.get(i);
BlockBuilder output = pageBuilder.getBlockBuilder(types.size() + i);
aggregator.evaluate(groupId, output);
}
}
return ProcessState.ofResult(pageBuilder.build());
});
}
public List buildTypes()
{
ArrayList types = new ArrayList<>(groupByHash.getTypes());
for (Aggregator aggregator : aggregators) {
types.add(aggregator.getType());
}
return types;
}
private IntIterator consecutiveGroupIds()
{
return IntIterators.fromTo(0, groupByHash.getGroupCount());
}
private IntIterator hashSortedGroupIds()
{
IntBigArray groupIds = new IntBigArray();
groupIds.ensureCapacity(groupByHash.getGroupCount());
for (int i = 0; i < groupByHash.getGroupCount(); i++) {
groupIds.set(i, i);
}
groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) ->
Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId)));
return new AbstractIntIterator()
{
private final int totalPositions = groupByHash.getGroupCount();
private int position;
@Override
public boolean hasNext()
{
return position < totalPositions;
}
@Override
public int nextInt()
{
return groupIds.get(position++);
}
};
}
private static class Aggregator
{
private final GroupedAccumulator aggregation;
private AggregationNode.Step step;
private final int intermediateChannel;
private Aggregator(AccumulatorFactory accumulatorFactory, AggregationNode.Step step, Optional overwriteIntermediateChannel)
{
if (step.isInputRaw()) {
this.intermediateChannel = -1;
this.aggregation = accumulatorFactory.createGroupedAccumulator();
}
else if (overwriteIntermediateChannel.isPresent()) {
this.intermediateChannel = overwriteIntermediateChannel.get();
this.aggregation = accumulatorFactory.createGroupedIntermediateAccumulator();
}
else {
checkArgument(accumulatorFactory.getInputChannels().size() == 1, "expected 1 input channel for intermediate aggregation");
this.intermediateChannel = accumulatorFactory.getInputChannels().get(0);
this.aggregation = accumulatorFactory.createGroupedIntermediateAccumulator();
}
this.step = step;
}
public long getEstimatedSize()
{
return aggregation.getEstimatedSize();
}
public Type getType()
{
if (step.isOutputPartial()) {
return aggregation.getIntermediateType();
}
else {
return aggregation.getFinalType();
}
}
public void processPage(GroupByIdBlock groupIds, Page page)
{
if (step.isInputRaw()) {
aggregation.addInput(groupIds, page);
}
else {
aggregation.addIntermediate(groupIds, page.getBlock(intermediateChannel));
}
}
public void prepareFinal()
{
aggregation.prepareFinal();
}
public void evaluate(int groupId, BlockBuilder output)
{
if (step.isOutputPartial()) {
aggregation.evaluateIntermediate(groupId, output);
}
else {
aggregation.evaluateFinal(groupId, output);
}
}
public void setOutputPartial()
{
step = AggregationNode.Step.partialOutput(step);
}
public Type getIntermediateType()
{
return aggregation.getIntermediateType();
}
}
public static List toTypes(List extends Type> groupByType, Step step, List factories, Optional hashChannel)
{
ImmutableList.Builder types = ImmutableList.builder();
types.addAll(groupByType);
if (hashChannel.isPresent()) {
types.add(BIGINT);
}
for (AccumulatorFactory factory : factories) {
types.add(new Aggregator(factory, step, Optional.empty()).getType());
}
return types.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy