org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.common.util.LongLongHash;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import java.util.Iterator;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.TreeSet;
/**
* Maps owning bucket ordinals and long bucket keys to bucket ordinals.
*/
public abstract class LongKeyedBucketOrds implements Releasable {
/**
* Build a {@link LongKeyedBucketOrds} who's values have unknown bounds.
*
* @param cardinality - This should come from the owning aggregation, and is used as an upper bound on the
* owning bucket ordinals.
*/
public static LongKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
}
/**
* Build a {@link LongKeyedBucketOrds} who's values have known bounds.
*
* @param cardinality - This should come from the owning aggregation, and is used as an upper bound on the
* owning bucket ordinals.
* @param min - The minimum key value for this aggregation
* @param max - The maximum key value for this aggregation
*/
public static LongKeyedBucketOrds buildForValueRange(BigArrays bigArrays, CardinalityUpperBound cardinality, long min, long max) {
return cardinality.map((int cardinalityUpperBound) -> {
if (cardinalityUpperBound < 2) {
return new FromSingle(bigArrays);
}
if (min < 0 || cardinalityUpperBound == Integer.MAX_VALUE) {
// cardinalityUpperBound tops out at maxint. If you see maxInt it could be anything above maxint.
return new FromMany(bigArrays);
}
int owningBucketOrdShift = Long.numberOfLeadingZeros(cardinalityUpperBound);
int maxBits = 64 - Long.numberOfLeadingZeros(max);
if (maxBits < owningBucketOrdShift) {
// There is enough space in a long to contain both the owning bucket and the entire range of values
return new FromManySmall(bigArrays, owningBucketOrdShift);
}
return new FromMany(bigArrays);
});
}
private TreeSet keySet = null;
private LongKeyedBucketOrds() {}
/**
* Add the {@code owningBucketOrd, value} pair. Return the ord for
* their bucket if they have yet to be added, or {@code -1-ord}
* if they were already present.
*/
public abstract long add(long owningBucketOrd, long value);
/**
* Count the buckets in {@code owningBucketOrd}.
*
* Some aggregations expect this to be fast but most wouldn't
* mind particularly if it weren't.
*/
public abstract long bucketsInOrd(long owningBucketOrd);
/**
* Find the {@code owningBucketOrd, value} pair. Return the ord for
* their bucket if they have been added or {@code -1} if they haven't.
*/
public abstract long find(long owningBucketOrd, long value);
/**
* Returns the value currently associated with the bucket ordinal.
*/
public abstract long get(long ordinal);
/**
* The number of collected buckets.
*/
public abstract long size();
/**
* The maximum possible used {@code owningBucketOrd}.
*/
public abstract long maxOwningBucketOrd();
/**
* Description used in profile results.
*/
public abstract String decribe();
/**
* Build an iterator for buckets inside {@code owningBucketOrd} in order
* of increasing ord.
*
* When this is first returns it is "unpositioned" and you must call
* {@link BucketOrdsEnum#next()} to move it to the first value.
*/
public abstract BucketOrdsEnum ordsEnum(long owningBucketOrd);
/**
* Return an iterator for all keys in the given owning bucket, ordered in natural sort order.
* This is suitable for aligning buckets across different instances of an aggregation.
*
* @param owningBucketOrd Only return keys that occured under this owning bucket
* @return a sorted iterator of long key values
*/
public Iterator keyOrderedIterator(long owningBucketOrd) {
if (keySet == null) {
// TreeSet's contract includes a naturally ordered iterator
keySet = new TreeSet<>();
for (long ord = 0; ord < size(); ord++) {
keySet.add(this.get(ord));
}
}
Iterator toReturn = new Iterator<>() {
final Iterator wrapped = keySet.iterator();
final long filterOrd = owningBucketOrd;
long next;
boolean hasNext = true;
@Override
public boolean hasNext() {
return hasNext;
}
@Override
public Long next() {
if (hasNext == false) {
throw new NoSuchElementException();
}
long toReturn = next;
hasNext = false;
while (wrapped.hasNext()) {
long candidate = wrapped.next();
if (find(filterOrd, candidate) != -1) {
next = candidate;
hasNext = true;
break;
}
}
return toReturn;
}
};
toReturn.next(); // Prime the first actual value
return toReturn;
}
public void close() {
keySet = null;
}
/**
* An iterator for buckets inside a particular {@code owningBucketOrd}.
*/
public interface BucketOrdsEnum {
/**
* Advance to the next value.
* @return {@code true} if there *is* a next value,
* {@code false} if there isn't
*/
boolean next();
/**
* The ordinal of the current value.
*/
long ord();
/**
* The current value.
*/
long value();
/**
* An {@linkplain BucketOrdsEnum} that is empty.
*/
BucketOrdsEnum EMPTY = new BucketOrdsEnum() {
@Override
public boolean next() {
return false;
}
@Override
public long ord() {
return 0;
}
@Override
public long value() {
return 0;
}
};
}
/**
* Implementation that only works if it is collecting from a single bucket.
*/
public static class FromSingle extends LongKeyedBucketOrds {
private final LongHash ords;
public FromSingle(BigArrays bigArrays) {
ords = new LongHash(1, bigArrays);
}
@Override
public long add(long owningBucketOrd, long value) {
// This is in the critical path for collecting most aggs. Be careful of performance.
assert owningBucketOrd == 0;
return ords.add(value);
}
@Override
public long find(long owningBucketOrd, long value) {
assert owningBucketOrd == 0;
return ords.find(value);
}
@Override
public long get(long ordinal) {
return ords.get(ordinal);
}
@Override
public long bucketsInOrd(long owningBucketOrd) {
assert owningBucketOrd == 0;
return ords.size();
}
@Override
public long size() {
return ords.size();
}
@Override
public long maxOwningBucketOrd() {
return 0;
}
@Override
public String decribe() {
return "single bucket ords";
}
@Override
public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
assert owningBucketOrd == 0;
return new BucketOrdsEnum() {
private long ord = -1;
private long value;
@Override
public boolean next() {
ord++;
if (ord >= ords.size()) {
return false;
}
value = ords.get(ord);
return true;
}
@Override
public long value() {
return value;
}
@Override
public long ord() {
return ord;
}
};
}
@Override
public void close() {
super.close();
ords.close();
}
}
/**
* Implementation that works properly when collecting from many buckets.
*/
public static class FromMany extends LongKeyedBucketOrds {
private final LongLongHash ords;
public FromMany(BigArrays bigArrays) {
ords = new LongLongHash(2, bigArrays);
}
@Override
public long add(long owningBucketOrd, long value) {
// This is in the critical path for collecting most aggs. Be careful of performance.
return ords.add(owningBucketOrd, value);
}
@Override
public long find(long owningBucketOrd, long value) {
return ords.find(owningBucketOrd, value);
}
@Override
public long get(long ordinal) {
return ords.getKey2(ordinal);
}
@Override
public long bucketsInOrd(long owningBucketOrd) {
// TODO it'd be faster to count the number of buckets in a list of these ords rather than one at a time
long count = 0;
for (long i = 0; i < ords.size(); i++) {
if (ords.getKey1(i) == owningBucketOrd) {
count++;
}
}
return count;
}
@Override
public long size() {
return ords.size();
}
@Override
public long maxOwningBucketOrd() {
// TODO this is fairly expensive to compute. Can we avoid needing it?
long max = -1;
for (long i = 0; i < ords.size(); i++) {
max = Math.max(max, ords.getKey1(i));
}
return max;
}
@Override
public String decribe() {
return "many bucket ords";
}
@Override
public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
// TODO it'd be faster to iterate many ords at once rather than one at a time
return new BucketOrdsEnum() {
private long ord = -1;
private long value;
@Override
public boolean next() {
while (true) {
ord++;
if (ord >= ords.size()) {
return false;
}
if (ords.getKey1(ord) == owningBucketOrd) {
value = ords.getKey2(ord);
return true;
}
}
}
@Override
public long value() {
return value;
}
@Override
public long ord() {
return ord;
}
};
}
@Override
public void close() {
super.close();
ords.close();
}
}
/**
* Implementation that packs the {@code owningbucketOrd} into the top
* bits of a {@code long} and uses the bottom bits for the value.
*/
public static class FromManySmall extends LongKeyedBucketOrds {
private final LongHash ords;
private final int owningBucketOrdShift;
private final long owningBucketOrdMask;
public FromManySmall(BigArrays bigArrays, int owningBucketOrdShift) {
ords = new LongHash(2, bigArrays);
this.owningBucketOrdShift = owningBucketOrdShift;
this.owningBucketOrdMask = -1L << owningBucketOrdShift;
}
private long encode(long owningBucketOrd, long value) {
// This is in the critical path for collecting some aggs. Be careful of performance.
return (owningBucketOrd << owningBucketOrdShift) | value;
}
@Override
public long add(long owningBucketOrd, long value) {
// This is in the critical path for collecting lots of aggs. Be careful of performance.
long enc = encode(owningBucketOrd, value);
if (owningBucketOrd != (enc >>> owningBucketOrdShift) && (enc & ~owningBucketOrdMask) != value) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"[%s] and [%s] must fit in [%s..%s] bits",
owningBucketOrd,
value,
64 - owningBucketOrdShift,
owningBucketOrdShift
)
);
}
return ords.add(enc);
}
@Override
public long find(long owningBucketOrd, long value) {
if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) {
return -1;
}
if ((value & owningBucketOrdMask) != 0) {
return -1;
}
return ords.find(encode(owningBucketOrd, value));
}
@Override
public long get(long ordinal) {
return ords.get(ordinal) & ~owningBucketOrdMask;
}
@Override
public long bucketsInOrd(long owningBucketOrd) {
// TODO it'd be faster to count the number of buckets in a list of these ords rather than one at a time
if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) {
return 0;
}
long count = 0;
long enc = owningBucketOrd << owningBucketOrdShift;
for (long i = 0; i < ords.size(); i++) {
if ((ords.get(i) & owningBucketOrdMask) == enc) {
count++;
}
}
return count;
}
@Override
public long size() {
return ords.size();
}
@Override
public long maxOwningBucketOrd() {
// TODO this is fairly expensive to compute. Can we avoid needing it?
long max = -1;
for (long i = 0; i < ords.size(); i++) {
max = Math.max(max, (ords.get(i) & owningBucketOrdMask) >>> owningBucketOrdShift);
}
return max;
}
@Override
public String decribe() {
return "many bucket ords packed using [" + (64 - owningBucketOrdShift) + "/" + owningBucketOrdShift + "] bits";
}
@Override
public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
// TODO it'd be faster to iterate many ords at once rather than one at a time
if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) {
return BucketOrdsEnum.EMPTY;
}
final long encodedOwningBucketOrd = owningBucketOrd << owningBucketOrdShift;
return new BucketOrdsEnum() {
private long ord = -1;
private long value;
@Override
public boolean next() {
while (true) {
ord++;
if (ord >= ords.size()) {
return false;
}
long encoded = ords.get(ord);
if ((encoded & owningBucketOrdMask) == encodedOwningBucketOrd) {
value = encoded & ~owningBucketOrdMask;
return true;
}
}
}
@Override
public long value() {
return value;
}
@Override
public long ord() {
return ord;
}
};
}
@Override
public void close() {
super.close();
ords.close();
}
}
}