
io.prestosql.operator.MultiChannelGroupByHash Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.operator;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import io.prestosql.array.LongBigArray;
import io.prestosql.spi.Page;
import io.prestosql.spi.PageBuilder;
import io.prestosql.spi.PrestoException;
import io.prestosql.spi.block.Block;
import io.prestosql.spi.block.BlockBuilder;
import io.prestosql.spi.block.DictionaryBlock;
import io.prestosql.spi.block.RunLengthEncodedBlock;
import io.prestosql.spi.type.Type;
import io.prestosql.sql.gen.JoinCompiler;
import io.prestosql.type.BlockTypeOperators;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import org.openjdk.jol.info.ClassLayout;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.prestosql.operator.SyntheticAddress.decodePosition;
import static io.prestosql.operator.SyntheticAddress.decodeSliceIndex;
import static io.prestosql.operator.SyntheticAddress.encodeSyntheticAddress;
import static io.prestosql.spi.StandardErrorCode.GENERIC_INSUFFICIENT_RESOURCES;
import static io.prestosql.spi.type.BigintType.BIGINT;
import static io.prestosql.sql.gen.JoinCompiler.PagesHashStrategyFactory;
import static io.prestosql.util.HashCollisionsEstimator.estimateNumberOfHashCollisions;
import static it.unimi.dsi.fastutil.HashCommon.arraySize;
import static it.unimi.dsi.fastutil.HashCommon.murmurHash3;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
// This implementation assumes arrays used in the hash are always a power of 2
public class MultiChannelGroupByHash
implements GroupByHash
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(MultiChannelGroupByHash.class).instanceSize();
private static final float FILL_RATIO = 0.75f;
private final List types;
private final List hashTypes;
private final int[] channels;
private final PagesHashStrategy hashStrategy;
private final List> channelBuilders;
private final Optional inputHashChannel;
private final HashGenerator hashGenerator;
private final OptionalInt precomputedHashChannel;
private final boolean processDictionary;
private PageBuilder currentPageBuilder;
private long completedPagesMemorySize;
private int hashCapacity;
private int maxFill;
private int mask;
private long[] groupAddressByHash;
private int[] groupIdsByHash;
private byte[] rawHashByHashPosition;
private final LongBigArray groupAddressByGroupId;
private int nextGroupId;
private DictionaryLookBack dictionaryLookBack;
private long hashCollisions;
private double expectedHashCollisions;
// reserve enough memory before rehash
private final UpdateMemory updateMemory;
private long preallocatedMemoryInBytes;
private long currentPageSizeInBytes;
public MultiChannelGroupByHash(
List extends Type> hashTypes,
int[] hashChannels,
Optional inputHashChannel,
int expectedSize,
boolean processDictionary,
JoinCompiler joinCompiler,
BlockTypeOperators blockTypeOperators,
UpdateMemory updateMemory)
{
this.hashTypes = ImmutableList.copyOf(requireNonNull(hashTypes, "hashTypes is null"));
requireNonNull(joinCompiler, "joinCompiler is null");
requireNonNull(hashChannels, "hashChannels is null");
checkArgument(hashTypes.size() == hashChannels.length, "hashTypes and hashChannels have different sizes");
checkArgument(expectedSize > 0, "expectedSize must be greater than zero");
this.inputHashChannel = requireNonNull(inputHashChannel, "inputHashChannel is null");
this.types = inputHashChannel.isPresent() ? ImmutableList.copyOf(Iterables.concat(hashTypes, ImmutableList.of(BIGINT))) : this.hashTypes;
this.channels = hashChannels.clone();
this.hashGenerator = inputHashChannel.isPresent() ? new PrecomputedHashGenerator(inputHashChannel.get()) : new InterpretedHashGenerator(this.hashTypes, hashChannels, blockTypeOperators);
this.processDictionary = processDictionary;
// For each hashed channel, create an appendable list to hold the blocks (builders). As we
// add new values we append them to the existing block builder until it fills up and then
// we add a new block builder to each list.
ImmutableList.Builder outputChannels = ImmutableList.builder();
ImmutableList.Builder> channelBuilders = ImmutableList.builder();
for (int i = 0; i < hashChannels.length; i++) {
outputChannels.add(i);
channelBuilders.add(ObjectArrayList.wrap(new Block[1024], 0));
}
if (inputHashChannel.isPresent()) {
this.precomputedHashChannel = OptionalInt.of(hashChannels.length);
channelBuilders.add(ObjectArrayList.wrap(new Block[1024], 0));
}
else {
this.precomputedHashChannel = OptionalInt.empty();
}
this.channelBuilders = channelBuilders.build();
PagesHashStrategyFactory pagesHashStrategyFactory = joinCompiler.compilePagesHashStrategyFactory(this.types, outputChannels.build());
hashStrategy = pagesHashStrategyFactory.createPagesHashStrategy(this.channelBuilders, this.precomputedHashChannel);
startNewPage();
// reserve memory for the arrays
hashCapacity = arraySize(expectedSize, FILL_RATIO);
maxFill = calculateMaxFill(hashCapacity);
mask = hashCapacity - 1;
groupAddressByHash = new long[hashCapacity];
Arrays.fill(groupAddressByHash, -1);
rawHashByHashPosition = new byte[hashCapacity];
groupIdsByHash = new int[hashCapacity];
groupAddressByGroupId = new LongBigArray();
groupAddressByGroupId.ensureCapacity(maxFill);
// This interface is used for actively reserving memory (push model) for rehash.
// The caller can also query memory usage on this object (pull model)
this.updateMemory = requireNonNull(updateMemory, "updateMemory is null");
}
@Override
public long getRawHash(int groupId)
{
long address = groupAddressByGroupId.get(groupId);
int blockIndex = decodeSliceIndex(address);
int position = decodePosition(address);
return hashStrategy.hashPosition(blockIndex, position);
}
@Override
public long getEstimatedSize()
{
return INSTANCE_SIZE +
(sizeOf(channelBuilders.get(0).elements()) * channelBuilders.size()) +
completedPagesMemorySize +
currentPageBuilder.getRetainedSizeInBytes() +
sizeOf(groupAddressByHash) +
sizeOf(groupIdsByHash) +
groupAddressByGroupId.sizeOf() +
sizeOf(rawHashByHashPosition) +
preallocatedMemoryInBytes;
}
@Override
public long getHashCollisions()
{
return hashCollisions;
}
@Override
public double getExpectedHashCollisions()
{
return expectedHashCollisions + estimateNumberOfHashCollisions(getGroupCount(), hashCapacity);
}
@Override
public List getTypes()
{
return types;
}
@Override
public int getGroupCount()
{
return nextGroupId;
}
@Override
public void appendValuesTo(int groupId, PageBuilder pageBuilder, int outputChannelOffset)
{
long address = groupAddressByGroupId.get(groupId);
int blockIndex = decodeSliceIndex(address);
int position = decodePosition(address);
hashStrategy.appendTo(blockIndex, position, pageBuilder, outputChannelOffset);
}
@Override
public Work> addPage(Page page)
{
currentPageSizeInBytes = page.getRetainedSizeInBytes();
if (isRunLengthEncoded(page)) {
return new AddRunLengthEncodedPageWork(page);
}
if (canProcessDictionary(page)) {
return new AddDictionaryPageWork(page);
}
return new AddNonDictionaryPageWork(page);
}
@Override
public Work getGroupIds(Page page)
{
currentPageSizeInBytes = page.getRetainedSizeInBytes();
if (isRunLengthEncoded(page)) {
return new GetRunLengthEncodedGroupIdsWork(page);
}
if (canProcessDictionary(page)) {
return new GetDictionaryGroupIdsWork(page);
}
return new GetNonDictionaryGroupIdsWork(page);
}
@Override
public boolean contains(int position, Page page, int[] hashChannels)
{
long rawHash = hashStrategy.hashRow(position, page);
return contains(position, page, hashChannels, rawHash);
}
@Override
public boolean contains(int position, Page page, int[] hashChannels, long rawHash)
{
int hashPosition = (int) getHashPosition(rawHash, mask);
// look for a slot containing this key
while (groupAddressByHash[hashPosition] != -1) {
if (positionNotDistinctFromCurrentRow(groupAddressByHash[hashPosition], hashPosition, position, page, (byte) rawHash, hashChannels)) {
// found an existing slot for this key
return true;
}
// increment position and mask to handle wrap around
hashPosition = (hashPosition + 1) & mask;
}
return false;
}
@VisibleForTesting
@Override
public int getCapacity()
{
return hashCapacity;
}
private int putIfAbsent(int position, Page page)
{
long rawHash = hashGenerator.hashPosition(position, page);
return putIfAbsent(position, page, rawHash);
}
private int putIfAbsent(int position, Page page, long rawHash)
{
int hashPosition = (int) getHashPosition(rawHash, mask);
// look for an empty slot or a slot containing this key
int groupId = -1;
while (groupAddressByHash[hashPosition] != -1) {
if (positionNotDistinctFromCurrentRow(groupAddressByHash[hashPosition], hashPosition, position, page, (byte) rawHash, channels)) {
// found an existing slot for this key
groupId = groupIdsByHash[hashPosition];
break;
}
// increment position and mask to handle wrap around
hashPosition = (hashPosition + 1) & mask;
hashCollisions++;
}
// did we find an existing group?
if (groupId < 0) {
groupId = addNewGroup(hashPosition, position, page, rawHash);
}
return groupId;
}
private int addNewGroup(int hashPosition, int position, Page page, long rawHash)
{
// add the row to the open page
for (int i = 0; i < channels.length; i++) {
int hashChannel = channels[i];
Type type = types.get(i);
type.appendTo(page.getBlock(hashChannel), position, currentPageBuilder.getBlockBuilder(i));
}
if (precomputedHashChannel.isPresent()) {
BIGINT.writeLong(currentPageBuilder.getBlockBuilder(precomputedHashChannel.getAsInt()), rawHash);
}
currentPageBuilder.declarePosition();
int pageIndex = channelBuilders.get(0).size() - 1;
int pagePosition = currentPageBuilder.getPositionCount() - 1;
long address = encodeSyntheticAddress(pageIndex, pagePosition);
// record group id in hash
int groupId = nextGroupId++;
groupAddressByHash[hashPosition] = address;
rawHashByHashPosition[hashPosition] = (byte) rawHash;
groupIdsByHash[hashPosition] = groupId;
groupAddressByGroupId.set(groupId, address);
// create new page builder if this page is full
if (currentPageBuilder.isFull()) {
startNewPage();
}
// increase capacity, if necessary
if (needRehash()) {
tryRehash();
}
return groupId;
}
private boolean needRehash()
{
return nextGroupId >= maxFill;
}
private void startNewPage()
{
if (currentPageBuilder != null) {
completedPagesMemorySize += currentPageBuilder.getRetainedSizeInBytes();
currentPageBuilder = currentPageBuilder.newPageBuilderLike();
}
else {
currentPageBuilder = new PageBuilder(types);
}
for (int i = 0; i < types.size(); i++) {
channelBuilders.get(i).add(currentPageBuilder.getBlockBuilder(i));
}
}
private boolean tryRehash()
{
long newCapacityLong = hashCapacity * 2L;
if (newCapacityLong > Integer.MAX_VALUE) {
throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries");
}
int newCapacity = toIntExact(newCapacityLong);
// An estimate of how much extra memory is needed before we can go ahead and expand the hash table.
// This includes the new capacity for groupAddressByHash, rawHashByHashPosition, groupIdsByHash, and groupAddressByGroupId as well as the size of the current page
preallocatedMemoryInBytes = (newCapacity - hashCapacity) * (long) (Long.BYTES + Integer.BYTES + Byte.BYTES) +
(calculateMaxFill(newCapacity) - maxFill) * Long.BYTES +
currentPageSizeInBytes;
if (!updateMemory.update()) {
// reserved memory but has exceeded the limit
return false;
}
preallocatedMemoryInBytes = 0;
expectedHashCollisions += estimateNumberOfHashCollisions(getGroupCount(), hashCapacity);
int newMask = newCapacity - 1;
long[] newKey = new long[newCapacity];
byte[] rawHashes = new byte[newCapacity];
Arrays.fill(newKey, -1);
int[] newValue = new int[newCapacity];
int oldIndex = 0;
for (int groupId = 0; groupId < nextGroupId; groupId++) {
// seek to the next used slot
while (groupAddressByHash[oldIndex] == -1) {
oldIndex++;
}
// get the address for this slot
long address = groupAddressByHash[oldIndex];
long rawHash = hashPosition(address);
// find an empty slot for the address
int pos = (int) getHashPosition(rawHash, newMask);
while (newKey[pos] != -1) {
pos = (pos + 1) & newMask;
hashCollisions++;
}
// record the mapping
newKey[pos] = address;
rawHashes[pos] = (byte) rawHash;
newValue[pos] = groupIdsByHash[oldIndex];
oldIndex++;
}
this.mask = newMask;
this.hashCapacity = newCapacity;
this.maxFill = calculateMaxFill(newCapacity);
this.groupAddressByHash = newKey;
this.rawHashByHashPosition = rawHashes;
this.groupIdsByHash = newValue;
groupAddressByGroupId.ensureCapacity(maxFill);
return true;
}
private long hashPosition(long sliceAddress)
{
int sliceIndex = decodeSliceIndex(sliceAddress);
int position = decodePosition(sliceAddress);
if (precomputedHashChannel.isPresent()) {
return getRawHash(sliceIndex, position);
}
return hashStrategy.hashPosition(sliceIndex, position);
}
private long getRawHash(int sliceIndex, int position)
{
return channelBuilders.get(precomputedHashChannel.getAsInt()).get(sliceIndex).getLong(position, 0);
}
private boolean positionNotDistinctFromCurrentRow(long address, int hashPosition, int position, Page page, byte rawHash, int[] hashChannels)
{
if (rawHashByHashPosition[hashPosition] != rawHash) {
return false;
}
return hashStrategy.positionNotDistinctFromRow(decodeSliceIndex(address), decodePosition(address), position, page, hashChannels);
}
private static long getHashPosition(long rawHash, int mask)
{
return murmurHash3(rawHash) & mask;
}
private static int calculateMaxFill(int hashSize)
{
checkArgument(hashSize > 0, "hashSize must be greater than 0");
int maxFill = (int) Math.ceil(hashSize * FILL_RATIO);
if (maxFill == hashSize) {
maxFill--;
}
checkArgument(hashSize > maxFill, "hashSize must be larger than maxFill");
return maxFill;
}
private void updateDictionaryLookBack(Block dictionary)
{
if (dictionaryLookBack == null || dictionaryLookBack.getDictionary() != dictionary) {
dictionaryLookBack = new DictionaryLookBack(dictionary);
}
}
// For a page that contains DictionaryBlocks, create a new page in which
// the dictionaries from the DictionaryBlocks are extracted into the corresponding channels
// From Page(DictionaryBlock1, DictionaryBlock2) create new page with Page(dictionary1, dictionary2)
private Page createPageWithExtractedDictionary(Page page)
{
Block[] blocks = new Block[page.getChannelCount()];
Block dictionary = ((DictionaryBlock) page.getBlock(channels[0])).getDictionary();
// extract data dictionary
blocks[channels[0]] = dictionary;
// extract hash dictionary
if (inputHashChannel.isPresent()) {
blocks[inputHashChannel.get()] = ((DictionaryBlock) page.getBlock(inputHashChannel.get())).getDictionary();
}
return new Page(dictionary.getPositionCount(), blocks);
}
private boolean canProcessDictionary(Page page)
{
if (!this.processDictionary || channels.length > 1 || !(page.getBlock(channels[0]) instanceof DictionaryBlock)) {
return false;
}
if (inputHashChannel.isPresent()) {
Block inputHashBlock = page.getBlock(inputHashChannel.get());
DictionaryBlock inputDataBlock = (DictionaryBlock) page.getBlock(channels[0]);
if (!(inputHashBlock instanceof DictionaryBlock)) {
// data channel is dictionary encoded but hash channel is not
return false;
}
if (!((DictionaryBlock) inputHashBlock).getDictionarySourceId().equals(inputDataBlock.getDictionarySourceId())) {
// dictionarySourceIds of data block and hash block do not match
return false;
}
}
return true;
}
private boolean isRunLengthEncoded(Page page)
{
for (int i = 0; i < channels.length; i++) {
if (!(page.getBlock(channels[i]) instanceof RunLengthEncodedBlock)) {
return false;
}
}
return true;
}
private int getGroupId(HashGenerator hashGenerator, Page page, int positionInDictionary)
{
if (dictionaryLookBack.isProcessed(positionInDictionary)) {
return dictionaryLookBack.getGroupId(positionInDictionary);
}
int groupId = putIfAbsent(positionInDictionary, page, hashGenerator.hashPosition(positionInDictionary, page));
dictionaryLookBack.setProcessed(positionInDictionary, groupId);
return groupId;
}
private static final class DictionaryLookBack
{
private final Block dictionary;
private final int[] processed;
public DictionaryLookBack(Block dictionary)
{
this.dictionary = dictionary;
this.processed = new int[dictionary.getPositionCount()];
Arrays.fill(processed, -1);
}
public Block getDictionary()
{
return dictionary;
}
public int getGroupId(int position)
{
return processed[position];
}
public boolean isProcessed(int position)
{
return processed[position] != -1;
}
public void setProcessed(int position, int groupId)
{
processed[position] = groupId;
}
}
private class AddNonDictionaryPageWork
implements Work
{
private final Page page;
private int lastPosition;
public AddNonDictionaryPageWork(Page page)
{
this.page = requireNonNull(page, "page is null");
}
@Override
public boolean process()
{
int positionCount = page.getPositionCount();
checkState(lastPosition < positionCount, "position count out of bound");
// needRehash() == false indicates we have reached capacity boundary and a rehash is needed.
// We can only proceed if tryRehash() successfully did a rehash.
if (needRehash() && !tryRehash()) {
return false;
}
// putIfAbsent will rehash automatically if rehash is needed, unless there isn't enough memory to do so.
// Therefore needRehash will not generally return true even if we have just crossed the capacity boundary.
while (lastPosition < positionCount && !needRehash()) {
// get the group for the current row
putIfAbsent(lastPosition, page);
lastPosition++;
}
return lastPosition == positionCount;
}
@Override
public Void getResult()
{
throw new UnsupportedOperationException();
}
}
private class AddDictionaryPageWork
implements Work
{
private final Page page;
private final Page dictionaryPage;
private final DictionaryBlock dictionaryBlock;
private int lastPosition;
public AddDictionaryPageWork(Page page)
{
verify(canProcessDictionary(page), "invalid call to addDictionaryPage");
this.page = requireNonNull(page, "page is null");
this.dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]);
updateDictionaryLookBack(dictionaryBlock.getDictionary());
this.dictionaryPage = createPageWithExtractedDictionary(page);
}
@Override
public boolean process()
{
int positionCount = page.getPositionCount();
checkState(lastPosition < positionCount, "position count out of bound");
// needRehash() == false indicates we have reached capacity boundary and a rehash is needed.
// We can only proceed if tryRehash() successfully did a rehash.
if (needRehash() && !tryRehash()) {
return false;
}
// putIfAbsent will rehash automatically if rehash is needed, unless there isn't enough memory to do so.
// Therefore needRehash will not generally return true even if we have just crossed the capacity boundary.
while (lastPosition < positionCount && !needRehash()) {
int positionInDictionary = dictionaryBlock.getId(lastPosition);
getGroupId(hashGenerator, dictionaryPage, positionInDictionary);
lastPosition++;
}
return lastPosition == positionCount;
}
@Override
public Void getResult()
{
throw new UnsupportedOperationException();
}
}
private class AddRunLengthEncodedPageWork
implements Work
{
private final Page page;
private boolean finished;
public AddRunLengthEncodedPageWork(Page page)
{
this.page = requireNonNull(page, "page is null");
}
@Override
public boolean process()
{
checkState(!finished);
if (page.getPositionCount() == 0) {
finished = true;
return true;
}
// needRehash() == false indicates we have reached capacity boundary and a rehash is needed.
// We can only proceed if tryRehash() successfully did a rehash.
if (needRehash() && !tryRehash()) {
return false;
}
// Only needs to process the first row since it is Run Length Encoded
putIfAbsent(0, page);
finished = true;
return true;
}
@Override
public Void getResult()
{
throw new UnsupportedOperationException();
}
}
private class GetNonDictionaryGroupIdsWork
implements Work
{
private final BlockBuilder blockBuilder;
private final Page page;
private boolean finished;
private int lastPosition;
public GetNonDictionaryGroupIdsWork(Page page)
{
this.page = requireNonNull(page, "page is null");
// we know the exact size required for the block
this.blockBuilder = BIGINT.createFixedSizeBlockBuilder(page.getPositionCount());
}
@Override
public boolean process()
{
int positionCount = page.getPositionCount();
checkState(lastPosition <= positionCount, "position count out of bound");
checkState(!finished);
// needRehash() == false indicates we have reached capacity boundary and a rehash is needed.
// We can only proceed if tryRehash() successfully did a rehash.
if (needRehash() && !tryRehash()) {
return false;
}
// putIfAbsent will rehash automatically if rehash is needed, unless there isn't enough memory to do so.
// Therefore needRehash will not generally return true even if we have just crossed the capacity boundary.
while (lastPosition < positionCount && !needRehash()) {
// output the group id for this row
BIGINT.writeLong(blockBuilder, putIfAbsent(lastPosition, page));
lastPosition++;
}
return lastPosition == positionCount;
}
@Override
public GroupByIdBlock getResult()
{
checkState(lastPosition == page.getPositionCount(), "process has not yet finished");
checkState(!finished, "result has produced");
finished = true;
return new GroupByIdBlock(nextGroupId, blockBuilder.build());
}
}
private class GetDictionaryGroupIdsWork
implements Work
{
private final BlockBuilder blockBuilder;
private final Page page;
private final Page dictionaryPage;
private final DictionaryBlock dictionaryBlock;
private boolean finished;
private int lastPosition;
public GetDictionaryGroupIdsWork(Page page)
{
this.page = requireNonNull(page, "page is null");
verify(canProcessDictionary(page), "invalid call to processDictionary");
this.dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]);
updateDictionaryLookBack(dictionaryBlock.getDictionary());
this.dictionaryPage = createPageWithExtractedDictionary(page);
// we know the exact size required for the block
this.blockBuilder = BIGINT.createFixedSizeBlockBuilder(page.getPositionCount());
}
@Override
public boolean process()
{
int positionCount = page.getPositionCount();
checkState(lastPosition < positionCount, "position count out of bound");
checkState(!finished);
// needRehash() == false indicates we have reached capacity boundary and a rehash is needed.
// We can only proceed if tryRehash() successfully did a rehash.
if (needRehash() && !tryRehash()) {
return false;
}
// putIfAbsent will rehash automatically if rehash is needed, unless there isn't enough memory to do so.
// Therefore needRehash will not generally return true even if we have just crossed the capacity boundary.
while (lastPosition < positionCount && !needRehash()) {
int positionInDictionary = dictionaryBlock.getId(lastPosition);
int groupId = getGroupId(hashGenerator, dictionaryPage, positionInDictionary);
BIGINT.writeLong(blockBuilder, groupId);
lastPosition++;
}
return lastPosition == positionCount;
}
@Override
public GroupByIdBlock getResult()
{
checkState(lastPosition == page.getPositionCount(), "process has not yet finished");
checkState(!finished, "result has produced");
finished = true;
return new GroupByIdBlock(nextGroupId, blockBuilder.build());
}
}
private class GetRunLengthEncodedGroupIdsWork
implements Work
{
private final Page page;
int groupId = -1;
private boolean processFinished;
private boolean resultProduced;
public GetRunLengthEncodedGroupIdsWork(Page page)
{
this.page = requireNonNull(page, "page is null");
}
@Override
public boolean process()
{
checkState(!processFinished);
if (page.getPositionCount() == 0) {
processFinished = true;
return true;
}
// needRehash() == false indicates we have reached capacity boundary and a rehash is needed.
// We can only proceed if tryRehash() successfully did a rehash.
if (needRehash() && !tryRehash()) {
return false;
}
// Only needs to process the first row since it is Run Length Encoded
groupId = putIfAbsent(0, page);
processFinished = true;
return true;
}
@Override
public GroupByIdBlock getResult()
{
checkState(processFinished);
checkState(!resultProduced);
resultProduced = true;
return new GroupByIdBlock(
nextGroupId,
new RunLengthEncodedBlock(
BIGINT.createFixedSizeBlockBuilder(1).writeLong(groupId).build(),
page.getPositionCount()));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy