All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.search.aggregations.bucket.histogram;

import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.rounding.Rounding;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.BucketStreamContext;
import org.elasticsearch.search.aggregations.bucket.BucketStreams;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.format.ValueFormatter;
import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

/**
 * TODO should be renamed to InternalNumericHistogram (see comment on {@link Histogram})?
 */
public class InternalHistogram extends InternalMultiBucketAggregation implements
        Histogram {

    final static Type TYPE = new Type("histogram", "histo");

    private final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
        @Override
        public InternalHistogram readResult(StreamInput in) throws IOException {
            InternalHistogram histogram = new InternalHistogram();
            histogram.readFrom(in);
            return histogram;
        }
    };

    private final static BucketStreams.Stream BUCKET_STREAM = new BucketStreams.Stream() {
        @Override
        public Bucket readResult(StreamInput in, BucketStreamContext context) throws IOException {
            Factory factory = (Factory) context.attributes().get("factory");
            if (factory == null) {
                throw new IllegalStateException("No factory found for histogram buckets");
            }
            Bucket histogram = new Bucket(context.keyed(), context.formatter(), factory);
            histogram.readFrom(in);
            return histogram;
        }

        @Override
        public BucketStreamContext getBucketStreamContext(Bucket bucket) {
            BucketStreamContext context = new BucketStreamContext();
            context.formatter(bucket.formatter);
            context.keyed(bucket.keyed);
            return context;
        }
    };

    public static void registerStream() {

        AggregationStreams.registerStream(STREAM, TYPE.stream());
        BucketStreams.registerStream(BUCKET_STREAM, TYPE.stream());
    }

    public static class Bucket extends InternalMultiBucketAggregation.InternalBucket implements Histogram.Bucket {

        long key;
        long docCount;
        InternalAggregations aggregations;
        private transient final boolean keyed;
        protected transient final ValueFormatter formatter;
        private Factory factory;

        public Bucket(boolean keyed, ValueFormatter formatter, Factory factory) {
            this.formatter = formatter;
            this.keyed = keyed;
            this.factory = factory;
        }

        public Bucket(long key, long docCount, boolean keyed, ValueFormatter formatter, Factory factory,
                InternalAggregations aggregations) {
            this(keyed, formatter, factory);
            this.key = key;
            this.docCount = docCount;
            this.aggregations = aggregations;
        }

        protected Factory getFactory() {
            return factory;
        }

        @Override
        public String getKeyAsString() {
            return formatter != null ? formatter.format(key) : ValueFormatter.RAW.format(key);
        }

        @Override
        public Object getKey() {
            return key;
        }

        @Override
        public long getDocCount() {
            return docCount;
        }

        @Override
        public Aggregations getAggregations() {
            return aggregations;
        }

         B reduce(List buckets, ReduceContext context) {
            List aggregations = new ArrayList<>(buckets.size());
            long docCount = 0;
            for (Bucket bucket : buckets) {
                docCount += bucket.docCount;
                aggregations.add((InternalAggregations) bucket.getAggregations());
            }
            InternalAggregations aggs = InternalAggregations.reduce(aggregations, context);
            return (B) getFactory().createBucket(key, docCount, aggs, keyed, formatter);
        }

        @Override
        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
            if (formatter != ValueFormatter.RAW) {
                Text keyTxt = new Text(formatter.format(key));
                if (keyed) {
                    builder.startObject(keyTxt.string());
                } else {
                    builder.startObject();
                }
                builder.field(CommonFields.KEY_AS_STRING, keyTxt);
            } else {
                if (keyed) {
                    builder.startObject(String.valueOf(getKey()));
                } else {
                    builder.startObject();
                }
            }
            builder.field(CommonFields.KEY, key);
            builder.field(CommonFields.DOC_COUNT, docCount);
            aggregations.toXContentInternal(builder, params);
            builder.endObject();
            return builder;
        }

        @Override
        public void readFrom(StreamInput in) throws IOException {
            key = in.readLong();
            docCount = in.readVLong();
            aggregations = InternalAggregations.readAggregations(in);
        }

        @Override
        public void writeTo(StreamOutput out) throws IOException {
            out.writeLong(key);
            out.writeVLong(docCount);
            aggregations.writeTo(out);
        }

        public ValueFormatter getFormatter() {
            return formatter;
        }

        public boolean getKeyed() {
            return keyed;
        }
    }

    static class EmptyBucketInfo {

        final Rounding rounding;
        final InternalAggregations subAggregations;
        final ExtendedBounds bounds;

        EmptyBucketInfo(Rounding rounding, InternalAggregations subAggregations) {
            this(rounding, subAggregations, null);
        }

        EmptyBucketInfo(Rounding rounding, InternalAggregations subAggregations, ExtendedBounds bounds) {
            this.rounding = rounding;
            this.subAggregations = subAggregations;
            this.bounds = bounds;
        }

        public static EmptyBucketInfo readFrom(StreamInput in) throws IOException {
            Rounding rounding = Rounding.Streams.read(in);
            InternalAggregations aggs = InternalAggregations.readAggregations(in);
            if (in.readBoolean()) {
                return new EmptyBucketInfo(rounding, aggs, ExtendedBounds.readFrom(in));
            }
            return new EmptyBucketInfo(rounding, aggs);
        }

        public static void writeTo(EmptyBucketInfo info, StreamOutput out) throws IOException {
            Rounding.Streams.write(info.rounding, out);
            info.subAggregations.writeTo(out);
            out.writeBoolean(info.bounds != null);
            if (info.bounds != null) {
                info.bounds.writeTo(out);
            }
        }

    }

    public static class Factory {

        protected Factory() {
        }

        public String type() {
            return TYPE.name();
        }

        public InternalHistogram create(String name, List buckets, InternalOrder order, long minDocCount,
                EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed,
                List pipelineAggregators,
                Map metaData) {
            return new InternalHistogram<>(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed, this, pipelineAggregators,
                    metaData);
        }

        public InternalHistogram create(List buckets, InternalHistogram prototype) {
            return new InternalHistogram<>(prototype.name, buckets, prototype.order, prototype.minDocCount, prototype.emptyBucketInfo,
                    prototype.formatter, prototype.keyed, this, prototype.pipelineAggregators(), prototype.metaData);
        }

        public B createBucket(InternalAggregations aggregations, B prototype) {
            return (B) new Bucket(prototype.key, prototype.docCount, prototype.getKeyed(), prototype.formatter, this, aggregations);
        }

        public B createBucket(Object key, long docCount, InternalAggregations aggregations, boolean keyed, ValueFormatter formatter) {
            if (key instanceof Number) {
                return (B) new Bucket(((Number) key).longValue(), docCount, keyed, formatter, this, aggregations);
            } else {
                throw new AggregationExecutionException("Expected key of type Number but got [" + key + "]");
            }
        }

        protected B createEmptyBucket(boolean keyed, ValueFormatter formatter) {
            return (B) new Bucket(keyed, formatter, this);
        }

    }

    protected List buckets;
    private InternalOrder order;
    private ValueFormatter formatter;
    private boolean keyed;
    private long minDocCount;
    private EmptyBucketInfo emptyBucketInfo;
    protected Factory factory;

    InternalHistogram() {} // for serialization

    InternalHistogram(String name, List buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo,
            ValueFormatter formatter, boolean keyed, Factory factory, List pipelineAggregators,
            Map metaData) {
        super(name, pipelineAggregators, metaData);
        this.buckets = buckets;
        this.order = order;
        assert (minDocCount == 0) == (emptyBucketInfo != null);
        this.minDocCount = minDocCount;
        this.emptyBucketInfo = emptyBucketInfo;
        this.formatter = formatter;
        this.keyed = keyed;
        this.factory = factory;
    }

    @Override
    public Type type() {
        return TYPE;
    }

    @Override
    public List getBuckets() {
        return buckets;
    }

    public Factory getFactory() {
        return factory;
    }

    public Rounding getRounding() {
        return emptyBucketInfo.rounding;
    }

    @Override
    public InternalHistogram create(List buckets) {
        return getFactory().create(buckets, this);
    }

    @Override
    public B createBucket(InternalAggregations aggregations, B prototype) {
        return getFactory().createBucket(aggregations, prototype);
    }

    private static class IteratorAndCurrent {

        private final Iterator iterator;
        private B current;

        IteratorAndCurrent(Iterator iterator) {
            this.iterator = iterator;
            current = iterator.next();
        }

    }

    private List reduceBuckets(List aggregations, ReduceContext reduceContext) {

        final PriorityQueue> pq = new PriorityQueue>(aggregations.size()) {
            @Override
            protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
                return a.current.key < b.current.key;
            }
        };
        for (InternalAggregation aggregation : aggregations) {
            InternalHistogram histogram = (InternalHistogram) aggregation;
            if (histogram.buckets.isEmpty() == false) {
                pq.add(new IteratorAndCurrent<>(histogram.buckets.iterator()));
            }
        }

        List reducedBuckets = new ArrayList<>();
        if (pq.size() > 0) {
            // list of buckets coming from different shards that have the same key
            List currentBuckets = new ArrayList<>();
            long key = pq.top().current.key;

            do {
                final IteratorAndCurrent top = pq.top();

                if (top.current.key != key) {
                    // the key changes, reduce what we already buffered and reset the buffer for current buckets
                    final B reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
                    if (reduced.getDocCount() >= minDocCount) {
                        reducedBuckets.add(reduced);
                    }
                    currentBuckets.clear();
                    key = top.current.key;
                }

                currentBuckets.add(top.current);

                if (top.iterator.hasNext()) {
                    final B next = top.iterator.next();
                    assert next.key > top.current.key : "shards must return data sorted by key";
                    top.current = next;
                    pq.updateTop();
                } else {
                    pq.pop();
                }
            } while (pq.size() > 0);

            if (currentBuckets.isEmpty() == false) {
                final B reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
                if (reduced.getDocCount() >= minDocCount) {
                    reducedBuckets.add(reduced);
                }
            }
        }

        return reducedBuckets;
    }

    private void addEmptyBuckets(List list, ReduceContext reduceContext) {
        B lastBucket = null;
        ExtendedBounds bounds = emptyBucketInfo.bounds;
        ListIterator iter = list.listIterator();

        // first adding all the empty buckets *before* the actual data (based on th extended_bounds.min the user requested)
        InternalAggregations reducedEmptySubAggs = InternalAggregations.reduce(Collections.singletonList(emptyBucketInfo.subAggregations),
                reduceContext);
        if (bounds != null) {
            B firstBucket = iter.hasNext() ? list.get(iter.nextIndex()) : null;
            if (firstBucket == null) {
                if (bounds.min != null && bounds.max != null) {
                    long key = bounds.min;
                    long max = bounds.max;
                    while (key <= max) {
                        iter.add(getFactory().createBucket(key, 0,
                                reducedEmptySubAggs,
                                keyed, formatter));
                        key = emptyBucketInfo.rounding.nextRoundingValue(key);
                    }
                }
            } else {
                if (bounds.min != null) {
                    long key = bounds.min;
                    if (key < firstBucket.key) {
                        while (key < firstBucket.key) {
                            iter.add(getFactory().createBucket(key, 0,
                                    reducedEmptySubAggs,
                                    keyed, formatter));
                            key = emptyBucketInfo.rounding.nextRoundingValue(key);
                        }
                    }
                }
            }
        }

        // now adding the empty buckets within the actual data,
        // e.g. if the data series is [1,2,3,7] there're 3 empty buckets that will be created for 4,5,6
        while (iter.hasNext()) {
            B nextBucket = list.get(iter.nextIndex());
            if (lastBucket != null) {
                long key = emptyBucketInfo.rounding.nextRoundingValue(lastBucket.key);
                while (key < nextBucket.key) {
                    iter.add(getFactory().createBucket(key, 0,
                            reducedEmptySubAggs, keyed,
                            formatter));
                    key = emptyBucketInfo.rounding.nextRoundingValue(key);
                }
                assert key == nextBucket.key;
            }
            lastBucket = iter.next();
        }

        // finally, adding the empty buckets *after* the actual data (based on the extended_bounds.max requested by the user)
        if (bounds != null && lastBucket != null && bounds.max != null && bounds.max > lastBucket.key) {
            long key = emptyBucketInfo.rounding.nextRoundingValue(lastBucket.key);
            long max = bounds.max;
            while (key <= max) {
                iter.add(getFactory().createBucket(key, 0,
                        reducedEmptySubAggs, keyed,
                        formatter));
                key = emptyBucketInfo.rounding.nextRoundingValue(key);
            }
        }
    }

    @Override
    public InternalAggregation doReduce(List aggregations, ReduceContext reduceContext) {
        List reducedBuckets = reduceBuckets(aggregations, reduceContext);

        // adding empty buckets if needed
        if (minDocCount == 0) {
            addEmptyBuckets(reducedBuckets, reduceContext);
        }

        if (order == InternalOrder.KEY_ASC) {
            // nothing to do, data are already sorted since shards return
            // sorted buckets and the merge-sort performed by reduceBuckets
            // maintains order
        } else if (order == InternalOrder.KEY_DESC) {
            // we just need to reverse here...
            List reverse = new ArrayList<>(reducedBuckets);
            Collections.reverse(reverse);
            reducedBuckets = reverse;
        } else {
            // sorted by sub-aggregation, need to fall back to a costly n*log(n) sort
            CollectionUtil.introSort(reducedBuckets, order.comparator());
        }

        return getFactory().create(getName(), reducedBuckets, order, minDocCount, emptyBucketInfo, formatter, keyed, pipelineAggregators(),
                getMetaData());
    }

    @Override
    protected void doReadFrom(StreamInput in) throws IOException {
        this.factory = resolveFactory(in.readString());
        order = InternalOrder.Streams.readOrder(in);
        minDocCount = in.readVLong();
        if (minDocCount == 0) {
            emptyBucketInfo = EmptyBucketInfo.readFrom(in);
        }
        formatter = ValueFormatterStreams.readOptional(in);
        keyed = in.readBoolean();
        int size = in.readVInt();
        List buckets = new ArrayList<>(size);
        for (int i = 0; i < size; i++) {
            B bucket = getFactory().createEmptyBucket(keyed, formatter);
            bucket.readFrom(in);
            buckets.add(bucket);
        }
        this.buckets = buckets;
    }

    @SuppressWarnings("unchecked")
    private static  Factory resolveFactory(String factoryType) {
        if (factoryType.equals(InternalDateHistogram.TYPE.name())) {
            return (Factory) new InternalDateHistogram.Factory();
        } else if (factoryType.equals(TYPE.name())) {
            return new Factory<>();
        } else {
            throw new IllegalStateException("Invalid histogram factory type [" + factoryType + "]");
        }
    }

    @Override
    protected void doWriteTo(StreamOutput out) throws IOException {
        out.writeString(factory.type());
        InternalOrder.Streams.writeOrder(order, out);
        out.writeVLong(minDocCount);
        if (minDocCount == 0) {
            EmptyBucketInfo.writeTo(emptyBucketInfo, out);
        }
        ValueFormatterStreams.writeOptional(formatter, out);
        out.writeBoolean(keyed);
        out.writeVInt(buckets.size());
        for (B bucket : buckets) {
            bucket.writeTo(out);
        }
    }

    @Override
    public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
        if (keyed) {
            builder.startObject(CommonFields.BUCKETS);
        } else {
            builder.startArray(CommonFields.BUCKETS);
        }
        for (B bucket : buckets) {
            bucket.toXContent(builder, params);
        }
        if (keyed) {
            builder.endObject();
        } else {
            builder.endArray();
        }
        return builder;
    }

}