org.elasticsearch.compute.aggregation.blockhash.BlockHash Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.compute.aggregation.blockhash;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.BitArray;
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.common.util.LongLongHash;
import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
import org.elasticsearch.compute.aggregation.SeenGroupIds;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.ElementType;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.IntVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.ReleasableIterator;
import java.util.Iterator;
import java.util.List;
/**
* A specialized hash table implementation maps values of a {@link Block} to ids (in longs).
* This class delegates to {@link LongHash} or {@link BytesRefHash}.
*
* @see LongHash
* @see BytesRefHash
*/
public abstract sealed class BlockHash implements Releasable, SeenGroupIds //
permits BooleanBlockHash, BytesRefBlockHash, DoubleBlockHash, IntBlockHash, LongBlockHash, BytesRef3BlockHash, //
NullBlockHash, PackedValuesBlockHash, BytesRefLongBlockHash, LongLongBlockHash, TimeSeriesBlockHash {
protected final BlockFactory blockFactory;
BlockHash(BlockFactory blockFactory) {
this.blockFactory = blockFactory;
}
/**
* Add all values for the "group by" columns in the page to the hash and
* pass the ordinals to the provided {@link GroupingAggregatorFunction.AddInput}.
*/
public abstract void add(Page page, GroupingAggregatorFunction.AddInput addInput);
/**
* Lookup all values for the "group by" columns in the page to the hash and return an
* {@link Iterator} of the values. The sum of {@link IntBlock#getPositionCount} for
* all blocks returned by the iterator will equal {@link Page#getPositionCount} but
* will "target" a size of {@code targetBlockSize}.
*
* The returned {@link ReleasableIterator} may retain a reference to {@link Block}s
* inside the {@link Page}. Close it to release those references.
*
*/
public abstract ReleasableIterator lookup(Page page, ByteSizeValue targetBlockSize);
/**
* Returns a {@link Block} that contains all the keys that are inserted by {@link #add}.
*/
public abstract Block[] getKeys();
/**
* The grouping ids that are not empty. We use this because some block hashes reserve
* space for grouping ids and then don't end up using them. For example,
* {@link BooleanBlockHash} does this by always assigning {@code false} to {@code 0}
* and {@code true} to {@code 1}. It's only after collection when we
* know if there actually were any {@code true} or {@code false} values received.
*/
public abstract IntVector nonEmpty();
// TODO merge with nonEmpty
@Override
public abstract BitArray seenGroupIds(BigArrays bigArrays);
public record GroupSpec(int channel, ElementType elementType) {}
/**
* Creates a specialized hash table that maps one or more {@link Block}s to ids.
* @param emitBatchSize maximum batch size to be emitted when handling combinatorial
* explosion of groups caused by multivalued fields
* @param allowBrokenOptimizations true to allow optimizations with bad null handling. We will fix their
* null handling and remove this flag, but we need to disable these in
* production until we can. And this lets us continue to compile and
* test them.
*/
public static BlockHash build(List groups, BlockFactory blockFactory, int emitBatchSize, boolean allowBrokenOptimizations) {
if (groups.size() == 1) {
return newForElementType(groups.get(0).channel(), groups.get(0).elementType(), blockFactory);
}
if (groups.size() == 3 && groups.stream().allMatch(g -> g.elementType == ElementType.BYTES_REF)) {
return new BytesRef3BlockHash(blockFactory, groups.get(0).channel, groups.get(1).channel, groups.get(2).channel, emitBatchSize);
}
if (allowBrokenOptimizations && groups.size() == 2) {
var g1 = groups.get(0);
var g2 = groups.get(1);
if (g1.elementType() == ElementType.LONG && g2.elementType() == ElementType.LONG) {
return new LongLongBlockHash(blockFactory, g1.channel(), g2.channel(), emitBatchSize);
}
if (g1.elementType() == ElementType.BYTES_REF && g2.elementType() == ElementType.LONG) {
return new BytesRefLongBlockHash(blockFactory, g1.channel(), g2.channel(), false, emitBatchSize);
}
if (g1.elementType() == ElementType.LONG && g2.elementType() == ElementType.BYTES_REF) {
return new BytesRefLongBlockHash(blockFactory, g2.channel(), g1.channel(), true, emitBatchSize);
}
}
return new PackedValuesBlockHash(groups, blockFactory, emitBatchSize);
}
/**
* Temporary method to build a {@link PackedValuesBlockHash}.
*/
public static BlockHash buildPackedValuesBlockHash(List groups, BlockFactory blockFactory, int emitBatchSize) {
return new PackedValuesBlockHash(groups, blockFactory, emitBatchSize);
}
/**
* Creates a specialized hash table that maps a {@link Block} of the given input element type to ids.
*/
private static BlockHash newForElementType(int channel, ElementType type, BlockFactory blockFactory) {
return switch (type) {
case NULL -> new NullBlockHash(channel, blockFactory);
case BOOLEAN -> new BooleanBlockHash(channel, blockFactory);
case INT -> new IntBlockHash(channel, blockFactory);
case LONG -> new LongBlockHash(channel, blockFactory);
case DOUBLE -> new DoubleBlockHash(channel, blockFactory);
case BYTES_REF -> new BytesRefBlockHash(channel, blockFactory);
default -> throw new IllegalArgumentException("unsupported grouping element type [" + type + "]");
};
}
/**
* Convert the result of calling {@link LongHash} or {@link LongLongHash}
* or {@link BytesRefHash} or similar to a group ordinal. These hashes
* return negative numbers if the value that was added has already been
* seen. We don't use that and convert it back to the positive ord.
*/
public static long hashOrdToGroup(long ord) {
if (ord < 0) { // already seen
return -1 - ord;
}
return ord;
}
/**
* Convert the result of calling {@link LongHash} or {@link LongLongHash}
* or {@link BytesRefHash} or similar to a group ordinal, reserving {@code 0}
* for null.
*/
public static long hashOrdToGroupNullReserved(long ord) {
return hashOrdToGroup(ord) + 1;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy