org.elasticsearch.search.aggregations.TopBucketBuilder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.15.1
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.search.aggregations;

import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;

/**
 * Merges many buckets into the "top" buckets as sorted by {@link BucketOrder}.
 */
public abstract class TopBucketBuilder {
    /**
     * The number of buckets required before we switch to the
     * {@link BufferingTopBucketBuilder}. If we need fewer buckets we use
     * {@link PriorityQueueTopBucketBuilder}.
     * 
     * The value we picked for this boundary is fairly arbitrary, but it
     * is important that its bigger than the default size of the terms
     * aggregation. It's basically the amount of memory you are willing to
     * waste when reduce small terms aggregations so it shouldn't be too
     * large either. The value we have, {@code 1024}, preallocates about
     * 32k for the priority queue.
     */
    static final int USE_BUFFERING_BUILDER = 1024;

    /**
     * Create a {@link TopBucketBuilder} to build a list of the top buckets.
     * 

     * If there are few required results we use a {@link PriorityQueueTopBucketBuilder}
     * which is simpler and when the priority queue is full but allocates {@code size + 1}
     * slots in an array. If there are many required results we prefer a
     * {@link BufferingTopBucketBuilder} which doesn't preallocate and is faster for the
     * first {@code size} results. But it's a little slower when the priority queue is full.
     * 

     * It's important for this not to preallocate a bunch of memory when
     * {@code size} is very very large because this backs the reduction of the {@code terms}
     * aggregation and folks often set the {@code size} of that to something quite large.
     * The choice in the paragraph above handles this case.
     *
     * @param size the requested size of the list
     * @param order the sort order of the buckets
     * @param nonCompetitive called with non-competitive buckets
     */
    public static  TopBucketBuilder build(
        int size,
        BucketOrder order,
        Consumer> nonCompetitive
    ) {
        if (size < USE_BUFFERING_BUILDER) {
            return new PriorityQueueTopBucketBuilder<>(size, order, nonCompetitive);
        }
        return new BufferingTopBucketBuilder<>(size, order, nonCompetitive);
    }

    protected final Consumer> nonCompetitive;

    private TopBucketBuilder(Consumer> nonCompetitive) {
        this.nonCompetitive = nonCompetitive;
    }

    /**
     * Add a bucket if it is competitive. If there isn't space but the
     * bucket is competitive then this will drop the least competitive bucket
     * to make room for the new bucket.
     * 
     * Instead of operating on complete buckets we this operates on a
     * wrapper containing what we need to merge the buckets called
     * {@link DelayedBucket}. We can evaluate some common sort criteria
     * directly on the {@linkplain DelayedBucket}s so we only need to
     * merge exactly the sub-buckets we need.
     */
    public abstract void add(DelayedBucket bucket);

    /**
     * Return the most competitive buckets sorted by the comparator.
     */
    public abstract List build();

    /**
     * Collects the "top" buckets by adding them directly to a {@link PriorityQueue}.
     * This is always going to be faster than {@link BufferingTopBucketBuilder}
     * but it requires allocating an array of {@code size + 1}.
     */
    static class PriorityQueueTopBucketBuilder extends TopBucketBuilder {
        private final PriorityQueue> queue;

        PriorityQueueTopBucketBuilder(int size, BucketOrder order, Consumer> nonCompetitive) {
            super(nonCompetitive);
            if (size >= ArrayUtil.MAX_ARRAY_LENGTH) {
                throw new IllegalArgumentException("can't reduce more than [" + ArrayUtil.MAX_ARRAY_LENGTH + "] buckets");
            }
            queue = new PriorityQueue>(size) {
                private final Comparator> comparator = order.delayedBucketComparator();

                @Override
                protected boolean lessThan(DelayedBucket a, DelayedBucket b) {
                    return comparator.compare(a, b) > 0;
                }
            };
        }

        @Override
        public void add(DelayedBucket bucket) {
            DelayedBucket removed = queue.insertWithOverflow(bucket);
            if (removed != null) {
                nonCompetitive.accept(removed);
                removed.nonCompetitive();
            }
        }

        @Override
        public List build() {
            List result = new ArrayList<>(queue.size());
            for (int i = queue.size() - 1; i >= 0; i--) {
                result.add(queue.pop().reduced());
            }
            Collections.reverse(result);
            return result;
        }
    }

    /**
     * Collects the "top" buckets by adding them to a {@link List} that grows
     * as more buckets arrive and is converting into a
     * {@link PriorityQueueTopBucketBuilder} when {@code size} buckets arrive.
     */
    private static class BufferingTopBucketBuilder extends TopBucketBuilder {
        private final int size;
        private final BucketOrder order;

        private List> buffer;
        private PriorityQueueTopBucketBuilder next;

        BufferingTopBucketBuilder(int size, BucketOrder order, Consumer> nonCompetitive) {
            super(nonCompetitive);
            this.size = size;
            this.order = order;
            buffer = new ArrayList<>();
        }

        @Override
        public void add(DelayedBucket bucket) {
            if (next != null) {
                assert buffer == null;
                next.add(bucket);
                return;
            }
            buffer.add(bucket);
            if (buffer.size() < size) {
                return;
            }
            next = new PriorityQueueTopBucketBuilder<>(size, order, nonCompetitive);
            for (DelayedBucket b : buffer) {
                next.queue.add(b);
            }
            buffer = null;
        }

        @Override
        public List build() {
            if (next != null) {
                assert buffer == null;
                return next.build();
            }
            List result = new ArrayList<>(buffer.size());
            for (DelayedBucket b : buffer) {
                result.add(b.reduced());
            }
            result.sort(order.comparator());
            return result;
        }
    }
}