org.elasticsearch.search.sort.BucketedSort Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.13.4
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.search.sort;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorable;
import org.elasticsearch.common.lucene.ScorerAware;
import org.elasticsearch.common.util.BigArray;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.BitArray;
import org.elasticsearch.common.util.DoubleArray;
import org.elasticsearch.common.util.FloatArray;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.search.DocValueFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;

import static java.util.Collections.emptyList;

/**
 * Type specialized sort implementations designed for use in aggregations.
 * Aggregations have a couple of super interesting characteristics:
 * 
 * They can have many, many buckets so this implementation backs to
 * {@link BigArrays} so it doesn't need to allocate any objects per bucket
 * and the circuit breaker in {@linkplain BigArrays} will automatically
 * track memory usage and abort execution if it grows too large.
 * Its fairly common for a bucket to be collected but not returned so
 * these implementations delay as much work as possible until collection
 * 
 * 
 * Every bucket is in one of two states: "gathering" or min/max "heap". While
 * "gathering" the next empty slot is stored in the "root" offset of the
 * bucket and collecting a value is just adding it in the next slot bumping
 * the tracking value at the root. So collecting values is {@code O(1)}.
 * Extracting the results in sorted order is {@code O(n * log n)} because,
 * well, sorting is {@code O(n * log n)}. When a bucket has collected
 * {@link #bucketSize} entries it is converted into a min "heap" in
 * {@code O(n)} time. Or into max heap, if {@link #order} is ascending.
 * 
 * 
 * Once a "heap", collecting a document is the heap-standard {@code O(log n)}
 * worst case. Critically, it is a very fast {@code O(1)} to check if a value
 * is competitive at all which, so long as buckets aren't hit in reverse
 * order, they mostly won't be. Extracting results in sorted order is still
 * {@code O(n * log n)}.
 * 
 * 
 * When we first collect a bucket we make sure that we've allocated enough
 * slots to hold all sort values for the entire bucket. In other words: the
 * storage is "dense" and we don't try to save space when storing partially
 * filled buckets.
 * 
 * 
 * We actually *oversize* the allocations
 * (like {@link BigArrays#overSize(long)}) to get amortized linear number
 * of allocations and to play well with our paged arrays.
 * 
 */
public abstract class BucketedSort implements Releasable {
    /**
     * Callbacks for storing extra data along with competitive sorts.
     */
    public interface ExtraData {
        /**
         * Swap the position of two bits of extra data.
         * 
         * Both parameters will have previously been loaded by
         * {@link Loader#loadFromDoc(long, int)} so the implementer shouldn't
         * need to grow the underlying storage to implement this.
         * 
         */
        void swap(long lhs, long rhs);

        /**
         * Prepare to load extra data from a leaf.
         */
        Loader loader(LeafReaderContext ctx) throws IOException;

        @FunctionalInterface
        interface Loader {
            /**
             * Load extra data from a doc.
             * 
             * Implementers should grow their underlying
             * storage to fit the {@code index}.
             * 
             */
            void loadFromDoc(long index, int doc) throws IOException;
        }
    }

    /**
     * An implementation of {@linkplain ExtraData} that does nothing.
     */
    public static final ExtraData NOOP_EXTRA_DATA = new ExtraData() {
        @Override
        public void swap(long lhs, long rhs) {}

        @Override
        public Loader loader(LeafReaderContext ctx) throws IOException {
            return (index, doc) -> {};
        }
    };

    protected final BigArrays bigArrays;
    private final SortOrder order;
    private final DocValueFormat format;
    private final int bucketSize;
    protected final ExtraData extra;
    /**
     * {@code true} if the bucket is in heap mode, {@code false} if
     * it is still gathering.
     */
    private final BitArray heapMode;

    protected BucketedSort(BigArrays bigArrays, SortOrder order, DocValueFormat format, int bucketSize, ExtraData extra) {
        this.bigArrays = bigArrays;
        this.order = order;
        this.format = format;
        this.bucketSize = bucketSize;
        this.extra = extra;
        heapMode = new BitArray(1, bigArrays);
    }

    /**
     * The order of the sort.
     */
    public final SortOrder getOrder() {
        return order;
    }

    /**
     * The format to use when presenting the values.
     */
    public final DocValueFormat getFormat() {
        return format;
    }

    /**
     * The number of values to store per bucket.
     */
    public int getBucketSize() {
        return bucketSize;
    }

    /**
     * Used with {@link BucketedSort#getValues(long, ResultBuilder)} to
     * build results from the sorting operation.
     */
    @FunctionalInterface
    public interface ResultBuilder {
        T build(long index, SortValue sortValue) throws IOException;
    }

    /**
     * Get the values for a bucket if it has been collected. If it hasn't
     * then returns an empty list.
     * @param builder builds results. See {@link ExtraData} for how to store
     *                data along side the sort for this to extract.
     */
    public final > List getValues(long bucket, ResultBuilder builder) throws IOException {
        long rootIndex = bucket * bucketSize;
        if (rootIndex >= values().size()) {
            // We've never seen this bucket.
            return emptyList();
        }
        long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
        long end = rootIndex + bucketSize;
        List result = new ArrayList<>(bucketSize);
        for (long index = start; index < end; index++) {
            result.add(builder.build(index, getValue(index)));
        }
        // TODO we usually have a heap here so we could use that to build the results sorted.
        result.sort(order.wrap(Comparator.naturalOrder()));
        return result;
    }

    /**
     * Get the values for a bucket if it has been collected. If it hasn't
     * then returns an empty array.
     */
    public final List getValues(long bucket) throws IOException {
        return getValues(bucket, (i, sv) -> sv);
    }

    /**
     * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
     */
    public boolean inHeapMode(long bucket) {
        return heapMode.get(bucket);
    }

    /**
     * Get the {@linkplain Leaf} implementation that'll do that actual collecting.
     * @throws IOException most implementations need to perform IO to prepare for each leaf
     */
    public abstract Leaf forLeaf(LeafReaderContext ctx) throws IOException;

    /**
     * Does this sort need scores? Most don't, but sorting on {@code _score} does.
     */
    public abstract boolean needsScores();

    /**
     * The {@linkplain BigArray} backing this sort.
     */
    protected abstract BigArray values();

    /**
     * Grow the {@linkplain BigArray} backing this sort to account for new buckets.
     * This will only be called if the array is too small.
     */
    protected abstract void growValues(long minSize);

    /**
     * Get the next index that should be "gathered" for a bucket rooted
     * at {@code rootIndex}.
     */
    protected abstract int getNextGatherOffset(long rootIndex);

    /**
     * Set the next index that should be "gathered" for a bucket rooted
     * at {@code rootIndex}.
     */
    protected abstract void setNextGatherOffset(long rootIndex, int offset);

    /**
     * Get the value at an index.
     */
    protected abstract SortValue getValue(long index);

    /**
     * {@code true} if the entry at index {@code lhs} is "better" than
     * the entry at {@code rhs}. "Better" in this means "lower" for
     * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
     */
    protected abstract boolean betterThan(long lhs, long rhs);

    /**
     * Swap the data at two indices.
     */
    protected abstract void swap(long lhs, long rhs);

    /**
     * Return a fairly human readable representation of the array backing the sort.
     * 
     * This is intentionally not a {@link #toString()} implementation because it'll
     * be quite slow.
     * 
     */
    protected final String debugFormat() {
        StringBuilder b = new StringBuilder();
        for (long index = 0; index < values().size(); index++) {
            if (index % bucketSize == 0) {
                b.append('\n').append(String.format(Locale.ROOT, "%20d", index / bucketSize)).append(":  ");
            }
            b.append(String.format(Locale.ROOT, "%20s", getValue(index))).append(' ');
        }
        return b.toString();
    }

    /**
     * Initialize the gather offsets after setting up values. Subclasses
     * should call this once, after setting up their {@link #values()}.
     */
    protected final void initGatherOffsets() {
        setNextGatherOffsets(0);
    }

    /**
     * Allocate storage for more buckets and store the "next gather offset"
     * for those new buckets.
     */
    private void grow(long minSize) {
        long oldMax = values().size() - 1;
        growValues(minSize);
        // Set the next gather offsets for all newly allocated buckets.
        setNextGatherOffsets(oldMax - (oldMax % getBucketSize()) + getBucketSize());
    }

    /**
     * Maintain the "next gather offsets" for newly allocated buckets.
     */
    private void setNextGatherOffsets(long startingAt) {
        int nextOffset = getBucketSize() - 1;
        for (long bucketRoot = startingAt; bucketRoot < values().size(); bucketRoot += getBucketSize()) {
            setNextGatherOffset(bucketRoot, nextOffset);
        }
    }

    /**
     * Heapify a bucket who's entries are in random order.
     * 
     * This works by validating the heap property on each node, iterating
     * "upwards", pushing any out of order parents "down". Check out the
     * wikipedia
     * entry on binary heaps for more about this.
     * 
     * 
     * While this *looks* like it could easily be {@code O(n * log n)}, it is
     * a fairly well studied algorithm attributed to Floyd. There's
     * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
     * case.
     * 
     * 
     * Hayward, Ryan; McDiarmid, Colin (1991).
     * 
     * Average Case Analysis of Heap Building byRepeated Insertion J. Algorithms.
     * 
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
     * 
     * @param rootIndex the index the start of the bucket
     */
    private void heapify(long rootIndex) {
        int maxParent = bucketSize / 2 - 1;
        for (int parent = maxParent; parent >= 0; parent--) {
            downHeap(rootIndex, parent);
        }
    }

    /**
     * Correct the heap invariant of a parent and its children. This
     * runs in {@code O(log n)} time.
     * @param rootIndex index of the start of the bucket
     * @param parent Index within the bucket of the parent to check.
     *               For example, 0 is the "root".
     */
    private void downHeap(long rootIndex, int parent) {
        while (true) {
            long parentIndex = rootIndex + parent;
            int worst = parent;
            long worstIndex = parentIndex;
            int leftChild = parent * 2 + 1;
            long leftIndex = rootIndex + leftChild;
            if (leftChild < bucketSize) {
                if (betterThan(worstIndex, leftIndex)) {
                    worst = leftChild;
                    worstIndex = leftIndex;
                }
                int rightChild = leftChild + 1;
                long rightIndex = rootIndex + rightChild;
                if (rightChild < bucketSize && betterThan(worstIndex, rightIndex)) {
                    worst = rightChild;
                    worstIndex = rightIndex;
                }
            }
            if (worst == parent) {
                break;
            }
            swap(worstIndex, parentIndex);
            extra.swap(worstIndex, parentIndex);
            parent = worst;
        }
    }

    @Override
    public final void close() {
        Releasables.close(values(), heapMode);
    }

    /**
     * Performs the actual collection against a {@linkplain LeafReaderContext}.
     */
    public abstract class Leaf implements ScorerAware {
        private final LeafReaderContext ctx;
        private ExtraData.Loader loader = null;

        protected Leaf(LeafReaderContext ctx) {
            this.ctx = ctx;
        }

        /**
         * Collect this doc, returning {@code true} if it is competitive.
         */
        public final void collect(int doc, long bucket) throws IOException {
            if (false == advanceExact(doc)) {
                return;
            }
            long rootIndex = bucket * bucketSize;
            if (inHeapMode(bucket)) {
                if (docBetterThan(rootIndex)) {
                    // TODO a "bottom up" insert would save a couple of comparisons. Worth it?
                    setIndexToDocValue(rootIndex);
                    loader().loadFromDoc(rootIndex, doc);
                    downHeap(rootIndex, 0);
                }
                return;
            }
            // Gathering mode
            long requiredSize = rootIndex + bucketSize;
            if (values().size() < requiredSize) {
                grow(requiredSize);
            }
            int next = getNextGatherOffset(rootIndex);
            assert 0 <= next && next < bucketSize
                : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
            long index = next + rootIndex;
            setIndexToDocValue(index);
            loader().loadFromDoc(index, doc);
            if (next == 0) {
                heapMode.set(bucket);
                heapify(rootIndex);
            } else {
                setNextGatherOffset(rootIndex, next - 1);
            }
            return;
        }

        /**
         * Read the sort value from {@code doc} and return {@code true}
         * if there is a value for that document. Otherwise return
         * {@code false} and the sort will skip that document.
         */
        protected abstract boolean advanceExact(int doc) throws IOException;

        /**
         * Set the value at the index to the value of the document to which
         * we just advanced.
         */
        protected abstract void setIndexToDocValue(long index);

        /**
         * {@code true} if the sort value for the doc is "better" than the
         * entry at {@code index}. "Better" in means is "lower" for
         * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
         */
        protected abstract boolean docBetterThan(long index);

        /**
         * Get the extra data loader, building it if we haven't yet built one for this leaf.
         */
        private ExtraData.Loader loader() throws IOException {
            if (loader == null) {
                loader = extra.loader(ctx);
            }
            return loader;
        }
    }

    /**
     * Superclass for implementations of {@linkplain BucketedSort} for {@code double} keys.
     */
    public abstract static class ForDoubles extends BucketedSort {
        private DoubleArray values;

        public ForDoubles(BigArrays bigArrays, SortOrder sortOrder, DocValueFormat format, int bucketSize, ExtraData extra) {
            super(bigArrays, sortOrder, format, bucketSize, extra);
            boolean success = false;
            try {
                values = bigArrays.newDoubleArray(getBucketSize(), false);
                success = true;
            } finally {
                if (success == false) {
                    close();
                }
            }
            initGatherOffsets();
        }

        @Override
        public boolean needsScores() {
            return false;
        }

        @Override
        protected final BigArray values() {
            return values;
        }

        @Override
        protected final void growValues(long minSize) {
            values = bigArrays.grow(values, minSize);
        }

        @Override
        protected final int getNextGatherOffset(long rootIndex) {
            // This cast is safe because all ints fit accurately into a double.
            return (int) values.get(rootIndex);
        }

        @Override
        protected final void setNextGatherOffset(long rootIndex, int offset) {
            values.set(rootIndex, offset);
        }

        @Override
        protected final SortValue getValue(long index) {
            return SortValue.from(values.get(index));
        }

        @Override
        protected final boolean betterThan(long lhs, long rhs) {
            return getOrder().reverseMul() * Double.compare(values.get(lhs), values.get(rhs)) < 0;
        }

        @Override
        protected final void swap(long lhs, long rhs) {
            double tmp = values.get(lhs);
            values.set(lhs, values.get(rhs));
            values.set(rhs, tmp);
        }

        protected abstract class Leaf extends BucketedSort.Leaf {
            protected Leaf(LeafReaderContext ctx) {
                super(ctx);
            }

            /**
             * Return the value for of this sort for the document to which
             * we just {@link #advanceExact(int) moved}. This should be fast
             * because it is called twice per competitive hit when in heap
             * mode, once for {@link #docBetterThan(long)} and once
             * for {@link #setIndexToDocValue(long)}.
             */
            protected abstract double docValue();

            @Override
            public final void setScorer(Scorable scorer) {}

            @Override
            protected final void setIndexToDocValue(long index) {
                values.set(index, docValue());
            }

            @Override
            protected final boolean docBetterThan(long index) {
                return getOrder().reverseMul() * Double.compare(docValue(), values.get(index)) < 0;
            }
        }
    }

    /**
     * Superclass for implementations of {@linkplain BucketedSort} for {@code float} keys.
     */
    public abstract static class ForFloats extends BucketedSort {
        /**
         * The maximum size of buckets this can store. This is because we
         * store the next offset to write to in a float and floats only have
         * {@code 23} bits of mantissa so they can't accurate store values
         * higher than {@code 2 ^ 24}.
         */
        public static final int MAX_BUCKET_SIZE = (int) Math.pow(2, 24);

        private FloatArray values;

        public ForFloats(BigArrays bigArrays, SortOrder sortOrder, DocValueFormat format, int bucketSize, ExtraData extra) {
            super(bigArrays, sortOrder, format, bucketSize, extra);
            if (bucketSize > MAX_BUCKET_SIZE) {
                close();
                throw new IllegalArgumentException("bucket size must be less than [2^24] but was [" + bucketSize + "]");
            }
            boolean success = false;
            try {
                values = bigArrays.newFloatArray(1, false);
                success = true;
            } finally {
                if (success == false) {
                    close();
                }
            }
            initGatherOffsets();
        }

        @Override
        protected final BigArray values() {
            return values;
        }

        @Override
        protected final void growValues(long minSize) {
            values = bigArrays.grow(values, minSize);
        }

        @Override
        protected final int getNextGatherOffset(long rootIndex) {
            /*
             * This cast will not lose precision because we make sure never
             * to write values here that float can't store precisely.
             */
            return (int) values.get(rootIndex);
        }

        @Override
        protected final void setNextGatherOffset(long rootIndex, int offset) {
            values.set(rootIndex, offset);
        }

        @Override
        protected final SortValue getValue(long index) {
            return SortValue.from(values.get(index));
        }

        @Override
        protected final boolean betterThan(long lhs, long rhs) {
            return getOrder().reverseMul() * Float.compare(values.get(lhs), values.get(rhs)) < 0;
        }

        @Override
        protected final void swap(long lhs, long rhs) {
            float tmp = values.get(lhs);
            values.set(lhs, values.get(rhs));
            values.set(rhs, tmp);
        }

        protected abstract class Leaf extends BucketedSort.Leaf {
            protected Leaf(LeafReaderContext ctx) {
                super(ctx);
            }

            /**
             * Return the value for of this sort for the document to which
             * we just {@link #advanceExact(int) moved}. This should be fast
             * because it is called twice per competitive hit when in heap
             * mode, once for {@link #docBetterThan(long)} and once
             * for {@link #setIndexToDocValue(long)}.
             */
            protected abstract float docValue();

            @Override
            protected final void setIndexToDocValue(long index) {
                values.set(index, docValue());
            }

            @Override
            protected final boolean docBetterThan(long index) {
                return getOrder().reverseMul() * Float.compare(docValue(), values.get(index)) < 0;
            }
        }
    }

    /**
     * Superclass for implementations of {@linkplain BucketedSort} for {@code long} keys.
     */
    public abstract static class ForLongs extends BucketedSort {
        private LongArray values;

        public ForLongs(BigArrays bigArrays, SortOrder sortOrder, DocValueFormat format, int bucketSize, ExtraData extra) {
            super(bigArrays, sortOrder, format, bucketSize, extra);
            boolean success = false;
            try {
                values = bigArrays.newLongArray(1, false);
                success = true;
            } finally {
                if (success == false) {
                    close();
                }
            }
            initGatherOffsets();
        }

        @Override
        public final boolean needsScores() {
            return false;
        }

        @Override
        protected final BigArray values() {
            return values;
        }

        @Override
        protected final void growValues(long minSize) {
            values = bigArrays.grow(values, minSize);
        }

        @Override
        protected final int getNextGatherOffset(long rootIndex) {
            return (int) values.get(rootIndex);
        }

        @Override
        protected final void setNextGatherOffset(long rootIndex, int offset) {
            values.set(rootIndex, offset);
        }

        @Override
        protected final SortValue getValue(long index) {
            return SortValue.from(values.get(index));
        }

        @Override
        protected final boolean betterThan(long lhs, long rhs) {
            return getOrder().reverseMul() * Long.compare(values.get(lhs), values.get(rhs)) < 0;
        }

        @Override
        protected final void swap(long lhs, long rhs) {
            long tmp = values.get(lhs);
            values.set(lhs, values.get(rhs));
            values.set(rhs, tmp);
        }

        protected abstract class Leaf extends BucketedSort.Leaf {
            protected Leaf(LeafReaderContext ctx) {
                super(ctx);
            }

            /**
             * Return the value for of this sort for the document to which
             * we just {@link #advanceExact(int) moved}. This should be fast
             * because it is called twice per competitive hit when in heap
             * mode, once for {@link #docBetterThan(long)} and once
             * for {@link #setIndexToDocValue(long)}.
             */
            protected abstract long docValue();

            @Override
            public final void setScorer(Scorable scorer) {}

            @Override
            protected final void setIndexToDocValue(long index) {
                values.set(index, docValue());
            }

            @Override
            protected final boolean docBetterThan(long index) {
                return getOrder().reverseMul() * Long.compare(docValue(), values.get(index)) < 0;
            }
        }
    }
}