All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds Maven / Gradle / Ivy

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.search.aggregations.bucket.terms;

import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.common.util.LongLongHash;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;

import java.util.Iterator;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.TreeSet;

/**
 * Maps owning bucket ordinals and long bucket keys to bucket ordinals.
 */
public abstract class LongKeyedBucketOrds implements Releasable {
    /**
     * Build a {@link LongKeyedBucketOrds} who's values have unknown bounds.
     *
     * @param cardinality - This should come from the owning aggregation, and is used as an upper bound on the
     *                    owning bucket ordinals.
     */
    public static LongKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
        return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
    }

    /**
     * Build a {@link LongKeyedBucketOrds} who's values have known bounds.
     *
     * @param cardinality - This should come from the owning aggregation, and is used as an upper bound on the
     *                    owning bucket ordinals.
     * @param  min - The minimum key value for this aggregation
     * @param max - The maximum key value for this aggregation
     */
    public static LongKeyedBucketOrds buildForValueRange(BigArrays bigArrays, CardinalityUpperBound cardinality, long min, long max) {
        return cardinality.map((int cardinalityUpperBound) -> {
            if (cardinalityUpperBound < 2) {
                return new FromSingle(bigArrays);
            }
            if (min < 0 || cardinalityUpperBound == Integer.MAX_VALUE) {
                // cardinalityUpperBound tops out at maxint. If you see maxInt it could be anything above maxint.
                return new FromMany(bigArrays);
            }
            int owningBucketOrdShift = Long.numberOfLeadingZeros(cardinalityUpperBound);
            int maxBits = 64 - Long.numberOfLeadingZeros(max);
            if (maxBits < owningBucketOrdShift) {
                // There is enough space in a long to contain both the owning bucket and the entire range of values
                return new FromManySmall(bigArrays, owningBucketOrdShift);
            }
            return new FromMany(bigArrays);
        });
    }

    private TreeSet keySet = null;

    private LongKeyedBucketOrds() {}

    /**
     * Add the {@code owningBucketOrd, value} pair. Return the ord for
     * their bucket if they have yet to be added, or {@code -1-ord}
     * if they were already present.
     */
    public abstract long add(long owningBucketOrd, long value);

    /**
     * Count the buckets in {@code owningBucketOrd}.
     * 

* Some aggregations expect this to be fast but most wouldn't * mind particularly if it weren't. */ public abstract long bucketsInOrd(long owningBucketOrd); /** * Find the {@code owningBucketOrd, value} pair. Return the ord for * their bucket if they have been added or {@code -1} if they haven't. */ public abstract long find(long owningBucketOrd, long value); /** * Returns the value currently associated with the bucket ordinal. */ public abstract long get(long ordinal); /** * The number of collected buckets. */ public abstract long size(); /** * The maximum possible used {@code owningBucketOrd}. */ public abstract long maxOwningBucketOrd(); /** * Description used in profile results. */ public abstract String decribe(); /** * Build an iterator for buckets inside {@code owningBucketOrd} in order * of increasing ord. *

* When this is first returns it is "unpositioned" and you must call * {@link BucketOrdsEnum#next()} to move it to the first value. */ public abstract BucketOrdsEnum ordsEnum(long owningBucketOrd); /** * Return an iterator for all keys in the given owning bucket, ordered in natural sort order. * This is suitable for aligning buckets across different instances of an aggregation. * * @param owningBucketOrd Only return keys that occured under this owning bucket * @return a sorted iterator of long key values */ public Iterator keyOrderedIterator(long owningBucketOrd) { if (keySet == null) { // TreeSet's contract includes a naturally ordered iterator keySet = new TreeSet<>(); for (long ord = 0; ord < size(); ord++) { keySet.add(this.get(ord)); } } Iterator toReturn = new Iterator<>() { final Iterator wrapped = keySet.iterator(); final long filterOrd = owningBucketOrd; long next; boolean hasNext = true; @Override public boolean hasNext() { return hasNext; } @Override public Long next() { if (hasNext == false) { throw new NoSuchElementException(); } long toReturn = next; hasNext = false; while (wrapped.hasNext()) { long candidate = wrapped.next(); if (find(filterOrd, candidate) != -1) { next = candidate; hasNext = true; break; } } return toReturn; } }; toReturn.next(); // Prime the first actual value return toReturn; } public void close() { keySet = null; } /** * An iterator for buckets inside a particular {@code owningBucketOrd}. */ public interface BucketOrdsEnum { /** * Advance to the next value. * @return {@code true} if there *is* a next value, * {@code false} if there isn't */ boolean next(); /** * The ordinal of the current value. */ long ord(); /** * The current value. */ long value(); /** * An {@linkplain BucketOrdsEnum} that is empty. */ BucketOrdsEnum EMPTY = new BucketOrdsEnum() { @Override public boolean next() { return false; } @Override public long ord() { return 0; } @Override public long value() { return 0; } }; } /** * Implementation that only works if it is collecting from a single bucket. */ public static class FromSingle extends LongKeyedBucketOrds { private final LongHash ords; public FromSingle(BigArrays bigArrays) { ords = new LongHash(1, bigArrays); } @Override public long add(long owningBucketOrd, long value) { // This is in the critical path for collecting most aggs. Be careful of performance. assert owningBucketOrd == 0; return ords.add(value); } @Override public long find(long owningBucketOrd, long value) { assert owningBucketOrd == 0; return ords.find(value); } @Override public long get(long ordinal) { return ords.get(ordinal); } @Override public long bucketsInOrd(long owningBucketOrd) { assert owningBucketOrd == 0; return ords.size(); } @Override public long size() { return ords.size(); } @Override public long maxOwningBucketOrd() { return 0; } @Override public String decribe() { return "single bucket ords"; } @Override public BucketOrdsEnum ordsEnum(long owningBucketOrd) { assert owningBucketOrd == 0; return new BucketOrdsEnum() { private long ord = -1; private long value; @Override public boolean next() { ord++; if (ord >= ords.size()) { return false; } value = ords.get(ord); return true; } @Override public long value() { return value; } @Override public long ord() { return ord; } }; } @Override public void close() { super.close(); ords.close(); } } /** * Implementation that works properly when collecting from many buckets. */ public static class FromMany extends LongKeyedBucketOrds { private final LongLongHash ords; public FromMany(BigArrays bigArrays) { ords = new LongLongHash(2, bigArrays); } @Override public long add(long owningBucketOrd, long value) { // This is in the critical path for collecting most aggs. Be careful of performance. return ords.add(owningBucketOrd, value); } @Override public long find(long owningBucketOrd, long value) { return ords.find(owningBucketOrd, value); } @Override public long get(long ordinal) { return ords.getKey2(ordinal); } @Override public long bucketsInOrd(long owningBucketOrd) { // TODO it'd be faster to count the number of buckets in a list of these ords rather than one at a time long count = 0; for (long i = 0; i < ords.size(); i++) { if (ords.getKey1(i) == owningBucketOrd) { count++; } } return count; } @Override public long size() { return ords.size(); } @Override public long maxOwningBucketOrd() { // TODO this is fairly expensive to compute. Can we avoid needing it? long max = -1; for (long i = 0; i < ords.size(); i++) { max = Math.max(max, ords.getKey1(i)); } return max; } @Override public String decribe() { return "many bucket ords"; } @Override public BucketOrdsEnum ordsEnum(long owningBucketOrd) { // TODO it'd be faster to iterate many ords at once rather than one at a time return new BucketOrdsEnum() { private long ord = -1; private long value; @Override public boolean next() { while (true) { ord++; if (ord >= ords.size()) { return false; } if (ords.getKey1(ord) == owningBucketOrd) { value = ords.getKey2(ord); return true; } } } @Override public long value() { return value; } @Override public long ord() { return ord; } }; } @Override public void close() { super.close(); ords.close(); } } /** * Implementation that packs the {@code owningbucketOrd} into the top * bits of a {@code long} and uses the bottom bits for the value. */ public static class FromManySmall extends LongKeyedBucketOrds { private final LongHash ords; private final int owningBucketOrdShift; private final long owningBucketOrdMask; public FromManySmall(BigArrays bigArrays, int owningBucketOrdShift) { ords = new LongHash(2, bigArrays); this.owningBucketOrdShift = owningBucketOrdShift; this.owningBucketOrdMask = -1L << owningBucketOrdShift; } private long encode(long owningBucketOrd, long value) { // This is in the critical path for collecting some aggs. Be careful of performance. return (owningBucketOrd << owningBucketOrdShift) | value; } @Override public long add(long owningBucketOrd, long value) { // This is in the critical path for collecting lots of aggs. Be careful of performance. long enc = encode(owningBucketOrd, value); if (owningBucketOrd != (enc >>> owningBucketOrdShift) && (enc & ~owningBucketOrdMask) != value) { throw new IllegalArgumentException( String.format( Locale.ROOT, "[%s] and [%s] must fit in [%s..%s] bits", owningBucketOrd, value, 64 - owningBucketOrdShift, owningBucketOrdShift ) ); } return ords.add(enc); } @Override public long find(long owningBucketOrd, long value) { if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) { return -1; } if ((value & owningBucketOrdMask) != 0) { return -1; } return ords.find(encode(owningBucketOrd, value)); } @Override public long get(long ordinal) { return ords.get(ordinal) & ~owningBucketOrdMask; } @Override public long bucketsInOrd(long owningBucketOrd) { // TODO it'd be faster to count the number of buckets in a list of these ords rather than one at a time if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) { return 0; } long count = 0; long enc = owningBucketOrd << owningBucketOrdShift; for (long i = 0; i < ords.size(); i++) { if ((ords.get(i) & owningBucketOrdMask) == enc) { count++; } } return count; } @Override public long size() { return ords.size(); } @Override public long maxOwningBucketOrd() { // TODO this is fairly expensive to compute. Can we avoid needing it? long max = -1; for (long i = 0; i < ords.size(); i++) { max = Math.max(max, (ords.get(i) & owningBucketOrdMask) >>> owningBucketOrdShift); } return max; } @Override public String decribe() { return "many bucket ords packed using [" + (64 - owningBucketOrdShift) + "/" + owningBucketOrdShift + "] bits"; } @Override public BucketOrdsEnum ordsEnum(long owningBucketOrd) { // TODO it'd be faster to iterate many ords at once rather than one at a time if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) { return BucketOrdsEnum.EMPTY; } final long encodedOwningBucketOrd = owningBucketOrd << owningBucketOrdShift; return new BucketOrdsEnum() { private long ord = -1; private long value; @Override public boolean next() { while (true) { ord++; if (ord >= ords.size()) { return false; } long encoded = ords.get(ord); if ((encoded & owningBucketOrdMask) == encodedOwningBucketOrd) { value = encoded & ~owningBucketOrdMask; return true; } } } @Override public long value() { return value; } @Override public long ord() { return ord; } }; } @Override public void close() { super.close(); ords.close(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy