org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.bucket.histogram;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationReduceContext;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.InternalOrder;
import org.elasticsearch.search.aggregations.KeyComparable;
import org.elasticsearch.search.aggregations.bucket.IteratorAndCurrent;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.support.SamplingContext;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Objects;
import java.util.function.DoubleConsumer;
/**
* Implementation of {@link Histogram}.
*/
public final class InternalHistogram extends InternalMultiBucketAggregation
implements
Histogram,
HistogramFactory {
public static class Bucket extends InternalMultiBucketAggregation.InternalBucket implements Histogram.Bucket, KeyComparable {
final double key;
final long docCount;
final InternalAggregations aggregations;
private final transient boolean keyed;
protected final transient DocValueFormat format;
public Bucket(double key, long docCount, boolean keyed, DocValueFormat format, InternalAggregations aggregations) {
this.format = format;
this.keyed = keyed;
this.key = key;
this.docCount = docCount;
this.aggregations = aggregations;
}
/**
* Read from a stream.
*/
public Bucket(StreamInput in, boolean keyed, DocValueFormat format) throws IOException {
this.format = format;
this.keyed = keyed;
key = in.readDouble();
docCount = in.readVLong();
aggregations = InternalAggregations.readFrom(in);
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != Bucket.class) {
return false;
}
Bucket that = (Bucket) obj;
// No need to take the keyed and format parameters into account,
// they are already stored and tested on the InternalHistogram object
return key == that.key && docCount == that.docCount && Objects.equals(aggregations, that.aggregations);
}
@Override
public int hashCode() {
return Objects.hash(getClass(), key, docCount, aggregations);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeDouble(key);
out.writeVLong(docCount);
aggregations.writeTo(out);
}
@Override
public String getKeyAsString() {
return format.format(key).toString();
}
@Override
public Object getKey() {
return key;
}
@Override
public long getDocCount() {
return docCount;
}
@Override
public Aggregations getAggregations() {
return aggregations;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
String keyAsString = format.format(key).toString();
if (keyed) {
builder.startObject(keyAsString);
} else {
builder.startObject();
}
if (format != DocValueFormat.RAW) {
builder.field(CommonFields.KEY_AS_STRING.getPreferredName(), keyAsString);
}
builder.field(CommonFields.KEY.getPreferredName(), key);
builder.field(CommonFields.DOC_COUNT.getPreferredName(), docCount);
aggregations.toXContentInternal(builder, params);
builder.endObject();
return builder;
}
@Override
public int compareKey(Bucket other) {
return Double.compare(key, other.key);
}
public DocValueFormat getFormatter() {
return format;
}
public boolean getKeyed() {
return keyed;
}
Bucket finalizeSampling(SamplingContext samplingContext) {
return new Bucket(
key,
samplingContext.scaleUp(docCount),
keyed,
format,
InternalAggregations.finalizeSampling(aggregations, samplingContext)
);
}
}
public static class EmptyBucketInfo {
final double interval, offset, minBound, maxBound;
final InternalAggregations subAggregations;
public EmptyBucketInfo(double interval, double offset, double minBound, double maxBound, InternalAggregations subAggregations) {
this.interval = interval;
this.offset = offset;
this.minBound = minBound;
this.maxBound = maxBound;
this.subAggregations = subAggregations;
}
EmptyBucketInfo(StreamInput in) throws IOException {
this(in.readDouble(), in.readDouble(), in.readDouble(), in.readDouble(), InternalAggregations.readFrom(in));
}
public void writeTo(StreamOutput out) throws IOException {
out.writeDouble(interval);
out.writeDouble(offset);
out.writeDouble(minBound);
out.writeDouble(maxBound);
subAggregations.writeTo(out);
}
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
EmptyBucketInfo that = (EmptyBucketInfo) obj;
return interval == that.interval
&& offset == that.offset
&& minBound == that.minBound
&& maxBound == that.maxBound
&& Objects.equals(subAggregations, that.subAggregations);
}
@Override
public int hashCode() {
return Objects.hash(getClass(), interval, offset, minBound, maxBound, subAggregations);
}
}
private final List buckets;
private final BucketOrder order;
private final DocValueFormat format;
private final boolean keyed;
private final long minDocCount;
final EmptyBucketInfo emptyBucketInfo;
public InternalHistogram(
String name,
List buckets,
BucketOrder order,
long minDocCount,
EmptyBucketInfo emptyBucketInfo,
DocValueFormat formatter,
boolean keyed,
Map metadata
) {
super(name, metadata);
this.buckets = buckets;
this.order = order;
assert (minDocCount == 0) == (emptyBucketInfo != null);
this.minDocCount = minDocCount;
this.emptyBucketInfo = emptyBucketInfo;
this.format = formatter;
this.keyed = keyed;
}
/**
* Stream from a stream.
*/
public InternalHistogram(StreamInput in) throws IOException {
super(in);
order = InternalOrder.Streams.readHistogramOrder(in);
minDocCount = in.readVLong();
if (minDocCount == 0) {
emptyBucketInfo = new EmptyBucketInfo(in);
} else {
emptyBucketInfo = null;
}
format = in.readNamedWriteable(DocValueFormat.class);
keyed = in.readBoolean();
buckets = in.readList(stream -> new Bucket(stream, keyed, format));
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
InternalOrder.Streams.writeHistogramOrder(order, out);
out.writeVLong(minDocCount);
if (minDocCount == 0) {
emptyBucketInfo.writeTo(out);
}
out.writeNamedWriteable(format);
out.writeBoolean(keyed);
out.writeList(buckets);
}
@Override
public String getWriteableName() {
return HistogramAggregationBuilder.NAME;
}
@Override
public List getBuckets() {
return Collections.unmodifiableList(buckets);
}
long getMinDocCount() {
return minDocCount;
}
BucketOrder getOrder() {
return order;
}
@Override
public InternalHistogram create(List buckets) {
return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, format, keyed, metadata);
}
@Override
public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) {
return new Bucket(prototype.key, prototype.docCount, prototype.keyed, prototype.format, aggregations);
}
private List reduceBuckets(List aggregations, AggregationReduceContext reduceContext) {
final PriorityQueue> pq = new PriorityQueue<>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return Double.compare(a.current().key, b.current().key) < 0;
}
};
for (InternalAggregation aggregation : aggregations) {
InternalHistogram histogram = (InternalHistogram) aggregation;
if (histogram.buckets.isEmpty() == false) {
pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
}
}
List reducedBuckets = new ArrayList<>();
if (pq.size() > 0) {
// list of buckets coming from different shards that have the same key
List currentBuckets = new ArrayList<>();
double key = pq.top().current().key;
do {
final IteratorAndCurrent top = pq.top();
if (Double.compare(top.current().key, key) != 0) {
// The key changes, reduce what we already buffered and reset the buffer for current buckets.
// Using Double.compare instead of != to handle NaN correctly.
final Bucket reduced = reduceBucket(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
currentBuckets.clear();
key = top.current().key;
}
currentBuckets.add(top.current());
if (top.hasNext()) {
top.next();
assert Double.compare(top.current().key, key) > 0 : "shards must return data sorted by key";
pq.updateTop();
} else {
pq.pop();
}
} while (pq.size() > 0);
if (currentBuckets.isEmpty() == false) {
final Bucket reduced = reduceBucket(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
}
}
return reducedBuckets;
}
@Override
protected Bucket reduceBucket(List buckets, AggregationReduceContext context) {
assert buckets.size() > 0;
List aggregations = new ArrayList<>(buckets.size());
long docCount = 0;
for (Bucket bucket : buckets) {
docCount += bucket.docCount;
aggregations.add((InternalAggregations) bucket.getAggregations());
}
InternalAggregations aggs = InternalAggregations.reduce(aggregations, context);
return createBucket(buckets.get(0).key, docCount, aggs);
}
private double nextKey(double key) {
return round(key + emptyBucketInfo.interval + emptyBucketInfo.interval / 2);
}
private double round(double key) {
return Math.floor((key - emptyBucketInfo.offset) / emptyBucketInfo.interval) * emptyBucketInfo.interval + emptyBucketInfo.offset;
}
/**
* When we pre-count the empty buckets we report them periodically
* because you can configure the histogram to create more buckets than
* there are atoms in the universe. It'd take a while to count that high
* only to abort. So we report every couple thousand buckets. It's be
* simpler to report every single bucket we plan to allocate one at a time
* but that'd cause needless overhead on the circuit breakers. Counting a
* couple thousand buckets is plenty fast to fail this quickly in
* pathological cases and plenty large to keep the overhead minimal.
*/
private static final int REPORT_EMPTY_EVERY = 10_000;
private void addEmptyBuckets(List list, AggregationReduceContext reduceContext) {
/*
* Make sure we have space for the empty buckets we're going to add by
* counting all of the empties we plan to add and firing them into
* consumeBucketsAndMaybeBreak.
*/
class Counter implements DoubleConsumer {
private int size = list.size();
@Override
public void accept(double key) {
size++;
if (size >= REPORT_EMPTY_EVERY) {
reduceContext.consumeBucketsAndMaybeBreak(size);
size = 0;
}
}
}
Counter counter = new Counter();
iterateEmptyBuckets(list, list.listIterator(), counter);
reduceContext.consumeBucketsAndMaybeBreak(counter.size);
/*
* Now that we're sure we have space we allocate all the buckets.
*/
InternalAggregations reducedEmptySubAggs = InternalAggregations.reduce(
Collections.singletonList(emptyBucketInfo.subAggregations),
reduceContext
);
ListIterator iter = list.listIterator();
iterateEmptyBuckets(list, iter, key -> iter.add(new Bucket(key, 0, keyed, format, reducedEmptySubAggs)));
}
private void iterateEmptyBuckets(List list, ListIterator iter, DoubleConsumer onBucket) {
if (iter.hasNext() == false) {
// fill with empty buckets
for (double key = round(emptyBucketInfo.minBound); key <= emptyBucketInfo.maxBound; key = nextKey(key)) {
onBucket.accept(key);
}
return;
}
Bucket first = list.get(iter.nextIndex());
if (Double.isFinite(emptyBucketInfo.minBound)) {
// fill with empty buckets until the first key
for (double key = round(emptyBucketInfo.minBound); key < first.key; key = nextKey(key)) {
onBucket.accept(key);
}
}
// now adding the empty buckets within the actual data,
// e.g. if the data series is [1,2,3,7] there're 3 empty buckets that will be created for 4,5,6
Bucket lastBucket = null;
do {
Bucket nextBucket = list.get(iter.nextIndex());
if (lastBucket != null) {
double key = nextKey(lastBucket.key);
while (key < nextBucket.key) {
onBucket.accept(key);
key = nextKey(key);
}
assert key == nextBucket.key || Double.isNaN(nextBucket.key) : "key: " + key + ", nextBucket.key: " + nextBucket.key;
}
lastBucket = iter.next();
} while (iter.hasNext());
// finally, adding the empty buckets *after* the actual data (based on the extended_bounds.max requested by the user)
for (double key = nextKey(lastBucket.key); key <= emptyBucketInfo.maxBound; key = nextKey(key)) {
onBucket.accept(key);
}
}
@Override
public InternalAggregation reduce(List aggregations, AggregationReduceContext reduceContext) {
List reducedBuckets = reduceBuckets(aggregations, reduceContext);
boolean alreadyAccountedForBuckets = false;
if (reduceContext.isFinalReduce()) {
if (minDocCount == 0) {
addEmptyBuckets(reducedBuckets, reduceContext);
alreadyAccountedForBuckets = true;
}
if (InternalOrder.isKeyDesc(order)) {
// we just need to reverse here...
List reverse = new ArrayList<>(reducedBuckets);
Collections.reverse(reverse);
reducedBuckets = reverse;
} else if (InternalOrder.isKeyAsc(order) == false) {
// nothing to do when sorting by key ascending, as data is already sorted since shards return
// sorted buckets and the merge-sort performed by reduceBuckets maintains order.
// otherwise, sorted by compound order or sub-aggregation, we need to fall back to a costly n*log(n) sort
CollectionUtil.introSort(reducedBuckets, order.comparator());
}
}
if (false == alreadyAccountedForBuckets) {
reduceContext.consumeBucketsAndMaybeBreak(reducedBuckets.size());
}
return new InternalHistogram(getName(), reducedBuckets, order, minDocCount, emptyBucketInfo, format, keyed, getMetadata());
}
@Override
public InternalAggregation finalizeSampling(SamplingContext samplingContext) {
return new InternalHistogram(
getName(),
buckets.stream().map(b -> b.finalizeSampling(samplingContext)).toList(),
order,
minDocCount,
emptyBucketInfo,
format,
keyed,
getMetadata()
);
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
if (keyed) {
builder.startObject(CommonFields.BUCKETS.getPreferredName());
} else {
builder.startArray(CommonFields.BUCKETS.getPreferredName());
}
for (Bucket bucket : buckets) {
bucket.toXContent(builder, params);
}
if (keyed) {
builder.endObject();
} else {
builder.endArray();
}
return builder;
}
// HistogramFactory method impls
@Override
public Number getKey(MultiBucketsAggregation.Bucket bucket) {
return ((Bucket) bucket).key;
}
@Override
public Number nextKey(Number key) {
return nextKey(key.doubleValue());
}
@Override
public InternalAggregation createAggregation(List buckets) {
// convert buckets to the right type
List buckets2 = new ArrayList<>(buckets.size());
for (Object b : buckets) {
buckets2.add((Bucket) b);
}
buckets2 = Collections.unmodifiableList(buckets2);
return new InternalHistogram(name, buckets2, order, minDocCount, emptyBucketInfo, format, keyed, getMetadata());
}
@Override
public Bucket createBucket(Number key, long docCount, InternalAggregations aggregations) {
return new Bucket(key.doubleValue(), docCount, keyed, format, aggregations);
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) return false;
if (super.equals(obj) == false) return false;
InternalHistogram that = (InternalHistogram) obj;
return Objects.equals(buckets, that.buckets)
&& Objects.equals(emptyBucketInfo, that.emptyBucketInfo)
&& Objects.equals(format, that.format)
&& Objects.equals(keyed, that.keyed)
&& Objects.equals(minDocCount, that.minDocCount)
&& Objects.equals(order, that.order);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), buckets, emptyBucketInfo, format, keyed, minDocCount, order);
}
}