org.elasticsearch.search.aggregations.TopBucketBuilder Maven / Gradle / Ivy
Show all versions of elasticsearch Show documentation
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;
/**
* Merges many buckets into the "top" buckets as sorted by {@link BucketOrder}.
*/
public abstract class TopBucketBuilder {
/**
* The number of buckets required before we switch to the
* {@link BufferingTopBucketBuilder}. If we need fewer buckets we use
* {@link PriorityQueueTopBucketBuilder}.
*
* The value we picked for this boundary is fairly arbitrary, but it
* is important that its bigger than the default size of the terms
* aggregation. It's basically the amount of memory you are willing to
* waste when reduce small terms aggregations so it shouldn't be too
* large either. The value we have, {@code 1024}, preallocates about
* 32k for the priority queue.
*/
static final int USE_BUFFERING_BUILDER = 1024;
/**
* Create a {@link TopBucketBuilder} to build a list of the top buckets.
*
* If there are few required results we use a {@link PriorityQueueTopBucketBuilder}
* which is simpler and when the priority queue is full but allocates {@code size + 1}
* slots in an array. If there are many required results we prefer a
* {@link BufferingTopBucketBuilder} which doesn't preallocate and is faster for the
* first {@code size} results. But it's a little slower when the priority queue is full.
*
* It's important for this not to preallocate a bunch of memory when
* {@code size} is very very large because this backs the reduction of the {@code terms}
* aggregation and folks often set the {@code size} of that to something quite large.
* The choice in the paragraph above handles this case.
*
* @param size the requested size of the list
* @param order the sort order of the buckets
* @param nonCompetitive called with non-competitive buckets
*/
public static TopBucketBuilder build(
int size,
BucketOrder order,
Consumer> nonCompetitive
) {
if (size < USE_BUFFERING_BUILDER) {
return new PriorityQueueTopBucketBuilder<>(size, order, nonCompetitive);
}
return new BufferingTopBucketBuilder<>(size, order, nonCompetitive);
}
protected final Consumer> nonCompetitive;
private TopBucketBuilder(Consumer> nonCompetitive) {
this.nonCompetitive = nonCompetitive;
}
/**
* Add a bucket if it is competitive. If there isn't space but the
* bucket is competitive then this will drop the least competitive bucket
* to make room for the new bucket.
*
* Instead of operating on complete buckets we this operates on a
* wrapper containing what we need to merge the buckets called
* {@link DelayedBucket}. We can evaluate some common sort criteria
* directly on the {@linkplain DelayedBucket}s so we only need to
* merge exactly the sub-buckets we need.
*/
public abstract void add(DelayedBucket bucket);
/**
* Return the most competitive buckets sorted by the comparator.
*/
public abstract List build();
/**
* Collects the "top" buckets by adding them directly to a {@link PriorityQueue}.
* This is always going to be faster than {@link BufferingTopBucketBuilder}
* but it requires allocating an array of {@code size + 1}.
*/
static class PriorityQueueTopBucketBuilder extends TopBucketBuilder {
private final PriorityQueue> queue;
PriorityQueueTopBucketBuilder(int size, BucketOrder order, Consumer> nonCompetitive) {
super(nonCompetitive);
if (size >= ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("can't reduce more than [" + ArrayUtil.MAX_ARRAY_LENGTH + "] buckets");
}
queue = new PriorityQueue>(size) {
private final Comparator> comparator = order.delayedBucketComparator();
@Override
protected boolean lessThan(DelayedBucket a, DelayedBucket b) {
return comparator.compare(a, b) > 0;
}
};
}
@Override
public void add(DelayedBucket bucket) {
DelayedBucket removed = queue.insertWithOverflow(bucket);
if (removed != null) {
nonCompetitive.accept(removed);
removed.nonCompetitive();
}
}
@Override
public List build() {
List result = new ArrayList<>(queue.size());
for (int i = queue.size() - 1; i >= 0; i--) {
result.add(queue.pop().reduced());
}
Collections.reverse(result);
return result;
}
}
/**
* Collects the "top" buckets by adding them to a {@link List} that grows
* as more buckets arrive and is converting into a
* {@link PriorityQueueTopBucketBuilder} when {@code size} buckets arrive.
*/
private static class BufferingTopBucketBuilder extends TopBucketBuilder {
private final int size;
private final BucketOrder order;
private List> buffer;
private PriorityQueueTopBucketBuilder next;
BufferingTopBucketBuilder(int size, BucketOrder order, Consumer> nonCompetitive) {
super(nonCompetitive);
this.size = size;
this.order = order;
buffer = new ArrayList<>();
}
@Override
public void add(DelayedBucket bucket) {
if (next != null) {
assert buffer == null;
next.add(bucket);
return;
}
buffer.add(bucket);
if (buffer.size() < size) {
return;
}
next = new PriorityQueueTopBucketBuilder<>(size, order, nonCompetitive);
for (DelayedBucket b : buffer) {
next.queue.add(b);
}
buffer = null;
}
@Override
public List build() {
if (next != null) {
assert buffer == null;
return next.build();
}
List result = new ArrayList<>(buffer.size());
for (DelayedBucket b : buffer) {
result.add(b.reduced());
}
result.sort(order.comparator());
return result;
}
}
}