io.trino.spi.block.DictionaryBlock Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.spi.block;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.OptionalInt;
import java.util.function.ObjLongConsumer;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.trino.spi.block.BlockUtil.checkArrayRange;
import static io.trino.spi.block.BlockUtil.checkValidPosition;
import static io.trino.spi.block.BlockUtil.checkValidPositions;
import static io.trino.spi.block.BlockUtil.checkValidRegion;
import static io.trino.spi.block.BlockUtil.compactArray;
import static io.trino.spi.block.DictionaryId.randomDictionaryId;
import static java.lang.Math.min;
import static java.util.Objects.requireNonNull;
public final class DictionaryBlock
implements Block
{
private static final int INSTANCE_SIZE = instanceSize(DictionaryBlock.class) + instanceSize(DictionaryId.class);
private static final int NULL_NOT_FOUND = -1;
private final int positionCount;
private final ValueBlock dictionary;
private final int idsOffset;
private final int[] ids;
private final long retainedSizeInBytes;
private volatile long sizeInBytes = -1;
private volatile int uniqueIds = -1;
// isSequentialIds is only valid when uniqueIds is computed
private volatile boolean isSequentialIds;
private final DictionaryId dictionarySourceId;
private final boolean mayHaveNull;
public static Block create(int positionCount, Block dictionary, int[] ids)
{
return createInternal(0, positionCount, dictionary, ids, randomDictionaryId());
}
/**
* This should not only be used when creating a projection of another dictionary block.
*/
public static Block createProjectedDictionaryBlock(int positionCount, Block dictionary, int[] ids, DictionaryId dictionarySourceId)
{
return createInternal(0, positionCount, dictionary, ids, dictionarySourceId);
}
static Block createInternal(int idsOffset, int positionCount, Block dictionary, int[] ids, DictionaryId dictionarySourceId)
{
if (positionCount == 0) {
return dictionary.copyRegion(0, 0);
}
if (positionCount == 1) {
return dictionary.getRegion(ids[idsOffset], 1);
}
// if dictionary is an RLE then this can just be a new RLE
if (dictionary instanceof RunLengthEncodedBlock rle) {
return RunLengthEncodedBlock.create(rle.getValue(), positionCount);
}
if (dictionary instanceof ValueBlock valueBlock) {
return new DictionaryBlock(idsOffset, positionCount, valueBlock, ids, false, false, dictionarySourceId);
}
// unwrap dictionary in dictionary
int[] newIds = new int[positionCount];
for (int position = 0; position < positionCount; position++) {
newIds[position] = dictionary.getUnderlyingValuePosition(ids[idsOffset + position]);
}
return new DictionaryBlock(0, positionCount, dictionary.getUnderlyingValueBlock(), newIds, false, false, randomDictionaryId());
}
private DictionaryBlock(int idsOffset, int positionCount, ValueBlock dictionary, int[] ids, boolean dictionaryIsCompacted, boolean isSequentialIds, DictionaryId dictionarySourceId)
{
requireNonNull(dictionary, "dictionary is null");
requireNonNull(ids, "ids is null");
if (positionCount < 0) {
throw new IllegalArgumentException("positionCount is negative");
}
this.idsOffset = idsOffset;
if (ids.length - idsOffset < positionCount) {
throw new IllegalArgumentException("ids length is less than positionCount");
}
this.positionCount = positionCount;
this.dictionary = dictionary;
this.ids = ids;
this.dictionarySourceId = requireNonNull(dictionarySourceId, "dictionarySourceId is null");
this.retainedSizeInBytes = INSTANCE_SIZE + sizeOf(ids);
// avoid eager loading of lazy dictionaries
this.mayHaveNull = positionCount > 0 && (!dictionary.isLoaded() || dictionary.mayHaveNull());
if (dictionaryIsCompacted) {
this.sizeInBytes = dictionary.getSizeInBytes() + (Integer.BYTES * (long) positionCount);
this.uniqueIds = dictionary.getPositionCount();
}
if (isSequentialIds && !dictionaryIsCompacted) {
throw new IllegalArgumentException("sequential ids flag is only valid for compacted dictionary");
}
this.isSequentialIds = isSequentialIds;
}
public int[] getRawIds()
{
return ids;
}
public int getRawIdsOffset()
{
return idsOffset;
}
@Override
public ValueBlock getSingleValueBlock(int position)
{
return dictionary.getSingleValueBlock(getId(position));
}
@Override
public int getPositionCount()
{
return positionCount;
}
@Override
public OptionalInt fixedSizeInBytesPerPosition()
{
if (uniqueIds == positionCount) {
// Each position is unique, so the per-position fixed size of the dictionary plus the dictionary id overhead
// is our fixed size per position
OptionalInt dictionarySizePerPosition = dictionary.fixedSizeInBytesPerPosition();
// Nested dictionaries should not include the additional id array overhead in the result
if (dictionarySizePerPosition.isPresent()) {
dictionarySizePerPosition = OptionalInt.of(dictionarySizePerPosition.getAsInt() + Integer.BYTES);
}
return dictionarySizePerPosition;
}
return OptionalInt.empty();
}
@Override
public long getSizeInBytes()
{
if (sizeInBytes == -1) {
calculateCompactSize();
}
return sizeInBytes;
}
private void calculateCompactSize()
{
int uniqueIds = 0;
boolean[] used = new boolean[dictionary.getPositionCount()];
// nested dictionaries are assumed not to have sequential ids
boolean isSequentialIds = true;
int previousPosition = -1;
for (int i = 0; i < positionCount; i++) {
int position = ids[idsOffset + i];
// Avoid branching
uniqueIds += used[position] ? 0 : 1;
used[position] = true;
if (isSequentialIds) {
// this branch is predictable and will switch paths at most once while looping
isSequentialIds = previousPosition < position;
previousPosition = position;
}
}
this.sizeInBytes = getSizeInBytesForSelectedPositions(used, uniqueIds, positionCount);
this.uniqueIds = uniqueIds;
this.isSequentialIds = isSequentialIds;
}
@Override
public long getRegionSizeInBytes(int positionOffset, int length)
{
if (positionOffset == 0 && length == getPositionCount()) {
// Calculation of getRegionSizeInBytes is expensive in this class.
// On the other hand, getSizeInBytes result is cached.
return getSizeInBytes();
}
OptionalInt fixedSizeInBytesPerPosition = fixedSizeInBytesPerPosition();
if (fixedSizeInBytesPerPosition.isPresent()) {
// no ids repeat and the dictionary block has a fixed size per position
return fixedSizeInBytesPerPosition.getAsInt() * (long) length;
}
int uniqueIds = 0;
boolean[] used = new boolean[dictionary.getPositionCount()];
int startOffset = idsOffset + positionOffset;
for (int i = 0; i < length; i++) {
int id = ids[startOffset + i];
uniqueIds += used[id] ? 0 : 1;
used[id] = true;
}
return getSizeInBytesForSelectedPositions(used, uniqueIds, length);
}
@Override
public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionsCount)
{
checkValidPositions(positions, positionCount);
if (selectedPositionsCount == 0) {
return 0;
}
if (selectedPositionsCount == positionCount) {
return getSizeInBytes();
}
OptionalInt fixedSizeInBytesPerPosition = fixedSizeInBytesPerPosition();
if (fixedSizeInBytesPerPosition.isPresent()) {
// no ids repeat and the dictionary block has a fixed sizer per position
return fixedSizeInBytesPerPosition.getAsInt() * (long) selectedPositionsCount;
}
int uniqueIds = 0;
boolean[] used = new boolean[dictionary.getPositionCount()];
for (int i = 0; i < positions.length; i++) {
int id = ids[idsOffset + i];
if (positions[i]) {
uniqueIds += used[id] ? 0 : 1;
used[id] = true;
}
}
return getSizeInBytesForSelectedPositions(used, uniqueIds, selectedPositionsCount);
}
private long getSizeInBytesForSelectedPositions(boolean[] usedIds, int uniqueIds, int selectedPositions)
{
long dictionarySize = dictionary.getPositionsSizeInBytes(usedIds, uniqueIds);
if (uniqueIds == dictionary.getPositionCount() && this.sizeInBytes == -1) {
// All positions in the dictionary are referenced, store the uniqueId count and sizeInBytes
this.uniqueIds = uniqueIds;
this.sizeInBytes = dictionarySize + (Integer.BYTES * (long) positionCount);
}
return dictionarySize + (Integer.BYTES * (long) selectedPositions);
}
@Override
public long getRetainedSizeInBytes()
{
return retainedSizeInBytes + dictionary.getRetainedSizeInBytes();
}
@Override
public long getEstimatedDataSizeForStats(int position)
{
return dictionary.getEstimatedDataSizeForStats(getId(position));
}
@Override
public void retainedBytesForEachPart(ObjLongConsumer
© 2015 - 2025 Weber Informatics LLC | Privacy Policy