org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.histogram;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.rounding.Rounding;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.BucketStreamContext;
import org.elasticsearch.search.aggregations.bucket.BucketStreams;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.format.ValueFormatter;
import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
/**
* TODO should be renamed to InternalNumericHistogram (see comment on {@link Histogram})?
*/
public class InternalHistogram extends InternalMultiBucketAggregation implements
Histogram {
final static Type TYPE = new Type("histogram", "histo");
private final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
@Override
public InternalHistogram readResult(StreamInput in) throws IOException {
InternalHistogram histogram = new InternalHistogram();
histogram.readFrom(in);
return histogram;
}
};
private final static BucketStreams.Stream BUCKET_STREAM = new BucketStreams.Stream() {
@Override
public Bucket readResult(StreamInput in, BucketStreamContext context) throws IOException {
Factory factory = (Factory) context.attributes().get("factory");
if (factory == null) {
throw new IllegalStateException("No factory found for histogram buckets");
}
Bucket histogram = new Bucket(context.keyed(), context.formatter(), factory);
histogram.readFrom(in);
return histogram;
}
@Override
public BucketStreamContext getBucketStreamContext(Bucket bucket) {
BucketStreamContext context = new BucketStreamContext();
context.formatter(bucket.formatter);
context.keyed(bucket.keyed);
return context;
}
};
public static void registerStream() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
BucketStreams.registerStream(BUCKET_STREAM, TYPE.stream());
}
public static class Bucket extends InternalMultiBucketAggregation.InternalBucket implements Histogram.Bucket {
long key;
long docCount;
InternalAggregations aggregations;
private transient final boolean keyed;
protected transient final ValueFormatter formatter;
private Factory factory;
public Bucket(boolean keyed, ValueFormatter formatter, Factory factory) {
this.formatter = formatter;
this.keyed = keyed;
this.factory = factory;
}
public Bucket(long key, long docCount, boolean keyed, ValueFormatter formatter, Factory factory,
InternalAggregations aggregations) {
this(keyed, formatter, factory);
this.key = key;
this.docCount = docCount;
this.aggregations = aggregations;
}
protected Factory getFactory() {
return factory;
}
@Override
public String getKeyAsString() {
return formatter != null ? formatter.format(key) : ValueFormatter.RAW.format(key);
}
@Override
public Object getKey() {
return key;
}
@Override
public long getDocCount() {
return docCount;
}
@Override
public Aggregations getAggregations() {
return aggregations;
}
B reduce(List buckets, ReduceContext context) {
List aggregations = new ArrayList<>(buckets.size());
long docCount = 0;
for (Bucket bucket : buckets) {
docCount += bucket.docCount;
aggregations.add((InternalAggregations) bucket.getAggregations());
}
InternalAggregations aggs = InternalAggregations.reduce(aggregations, context);
return (B) getFactory().createBucket(key, docCount, aggs, keyed, formatter);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (formatter != ValueFormatter.RAW) {
Text keyTxt = new Text(formatter.format(key));
if (keyed) {
builder.startObject(keyTxt.string());
} else {
builder.startObject();
}
builder.field(CommonFields.KEY_AS_STRING, keyTxt);
} else {
if (keyed) {
builder.startObject(String.valueOf(getKey()));
} else {
builder.startObject();
}
}
builder.field(CommonFields.KEY, key);
builder.field(CommonFields.DOC_COUNT, docCount);
aggregations.toXContentInternal(builder, params);
builder.endObject();
return builder;
}
@Override
public void readFrom(StreamInput in) throws IOException {
key = in.readLong();
docCount = in.readVLong();
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeLong(key);
out.writeVLong(docCount);
aggregations.writeTo(out);
}
public ValueFormatter getFormatter() {
return formatter;
}
public boolean getKeyed() {
return keyed;
}
}
static class EmptyBucketInfo {
final Rounding rounding;
final InternalAggregations subAggregations;
final ExtendedBounds bounds;
EmptyBucketInfo(Rounding rounding, InternalAggregations subAggregations) {
this(rounding, subAggregations, null);
}
EmptyBucketInfo(Rounding rounding, InternalAggregations subAggregations, ExtendedBounds bounds) {
this.rounding = rounding;
this.subAggregations = subAggregations;
this.bounds = bounds;
}
public static EmptyBucketInfo readFrom(StreamInput in) throws IOException {
Rounding rounding = Rounding.Streams.read(in);
InternalAggregations aggs = InternalAggregations.readAggregations(in);
if (in.readBoolean()) {
return new EmptyBucketInfo(rounding, aggs, ExtendedBounds.readFrom(in));
}
return new EmptyBucketInfo(rounding, aggs);
}
public static void writeTo(EmptyBucketInfo info, StreamOutput out) throws IOException {
Rounding.Streams.write(info.rounding, out);
info.subAggregations.writeTo(out);
out.writeBoolean(info.bounds != null);
if (info.bounds != null) {
info.bounds.writeTo(out);
}
}
}
public static class Factory {
protected Factory() {
}
public String type() {
return TYPE.name();
}
public InternalHistogram create(String name, List buckets, InternalOrder order, long minDocCount,
EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed,
List pipelineAggregators,
Map metaData) {
return new InternalHistogram<>(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed, this, pipelineAggregators,
metaData);
}
public InternalHistogram create(List buckets, InternalHistogram prototype) {
return new InternalHistogram<>(prototype.name, buckets, prototype.order, prototype.minDocCount, prototype.emptyBucketInfo,
prototype.formatter, prototype.keyed, this, prototype.pipelineAggregators(), prototype.metaData);
}
public B createBucket(InternalAggregations aggregations, B prototype) {
return (B) new Bucket(prototype.key, prototype.docCount, prototype.getKeyed(), prototype.formatter, this, aggregations);
}
public B createBucket(Object key, long docCount, InternalAggregations aggregations, boolean keyed, ValueFormatter formatter) {
if (key instanceof Number) {
return (B) new Bucket(((Number) key).longValue(), docCount, keyed, formatter, this, aggregations);
} else {
throw new AggregationExecutionException("Expected key of type Number but got [" + key + "]");
}
}
protected B createEmptyBucket(boolean keyed, ValueFormatter formatter) {
return (B) new Bucket(keyed, formatter, this);
}
}
protected List buckets;
private InternalOrder order;
private ValueFormatter formatter;
private boolean keyed;
private long minDocCount;
private EmptyBucketInfo emptyBucketInfo;
protected Factory factory;
InternalHistogram() {} // for serialization
InternalHistogram(String name, List buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo,
ValueFormatter formatter, boolean keyed, Factory factory, List pipelineAggregators,
Map metaData) {
super(name, pipelineAggregators, metaData);
this.buckets = buckets;
this.order = order;
assert (minDocCount == 0) == (emptyBucketInfo != null);
this.minDocCount = minDocCount;
this.emptyBucketInfo = emptyBucketInfo;
this.formatter = formatter;
this.keyed = keyed;
this.factory = factory;
}
@Override
public Type type() {
return TYPE;
}
@Override
public List getBuckets() {
return buckets;
}
public Factory getFactory() {
return factory;
}
public Rounding getRounding() {
return emptyBucketInfo.rounding;
}
@Override
public InternalHistogram create(List buckets) {
return getFactory().create(buckets, this);
}
@Override
public B createBucket(InternalAggregations aggregations, B prototype) {
return getFactory().createBucket(aggregations, prototype);
}
private static class IteratorAndCurrent {
private final Iterator iterator;
private B current;
IteratorAndCurrent(Iterator iterator) {
this.iterator = iterator;
current = iterator.next();
}
}
private List reduceBuckets(List aggregations, ReduceContext reduceContext) {
final PriorityQueue> pq = new PriorityQueue>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return a.current.key < b.current.key;
}
};
for (InternalAggregation aggregation : aggregations) {
InternalHistogram histogram = (InternalHistogram) aggregation;
if (histogram.buckets.isEmpty() == false) {
pq.add(new IteratorAndCurrent<>(histogram.buckets.iterator()));
}
}
List reducedBuckets = new ArrayList<>();
if (pq.size() > 0) {
// list of buckets coming from different shards that have the same key
List currentBuckets = new ArrayList<>();
long key = pq.top().current.key;
do {
final IteratorAndCurrent top = pq.top();
if (top.current.key != key) {
// the key changes, reduce what we already buffered and reset the buffer for current buckets
final B reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount) {
reducedBuckets.add(reduced);
}
currentBuckets.clear();
key = top.current.key;
}
currentBuckets.add(top.current);
if (top.iterator.hasNext()) {
final B next = top.iterator.next();
assert next.key > top.current.key : "shards must return data sorted by key";
top.current = next;
pq.updateTop();
} else {
pq.pop();
}
} while (pq.size() > 0);
if (currentBuckets.isEmpty() == false) {
final B reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount) {
reducedBuckets.add(reduced);
}
}
}
return reducedBuckets;
}
private void addEmptyBuckets(List list, ReduceContext reduceContext) {
B lastBucket = null;
ExtendedBounds bounds = emptyBucketInfo.bounds;
ListIterator iter = list.listIterator();
// first adding all the empty buckets *before* the actual data (based on th extended_bounds.min the user requested)
InternalAggregations reducedEmptySubAggs = InternalAggregations.reduce(Collections.singletonList(emptyBucketInfo.subAggregations),
reduceContext);
if (bounds != null) {
B firstBucket = iter.hasNext() ? list.get(iter.nextIndex()) : null;
if (firstBucket == null) {
if (bounds.min != null && bounds.max != null) {
long key = bounds.min;
long max = bounds.max;
while (key <= max) {
iter.add(getFactory().createBucket(key, 0,
reducedEmptySubAggs,
keyed, formatter));
key = emptyBucketInfo.rounding.nextRoundingValue(key);
}
}
} else {
if (bounds.min != null) {
long key = bounds.min;
if (key < firstBucket.key) {
while (key < firstBucket.key) {
iter.add(getFactory().createBucket(key, 0,
reducedEmptySubAggs,
keyed, formatter));
key = emptyBucketInfo.rounding.nextRoundingValue(key);
}
}
}
}
}
// now adding the empty buckets within the actual data,
// e.g. if the data series is [1,2,3,7] there're 3 empty buckets that will be created for 4,5,6
while (iter.hasNext()) {
B nextBucket = list.get(iter.nextIndex());
if (lastBucket != null) {
long key = emptyBucketInfo.rounding.nextRoundingValue(lastBucket.key);
while (key < nextBucket.key) {
iter.add(getFactory().createBucket(key, 0,
reducedEmptySubAggs, keyed,
formatter));
key = emptyBucketInfo.rounding.nextRoundingValue(key);
}
assert key == nextBucket.key;
}
lastBucket = iter.next();
}
// finally, adding the empty buckets *after* the actual data (based on the extended_bounds.max requested by the user)
if (bounds != null && lastBucket != null && bounds.max != null && bounds.max > lastBucket.key) {
long key = emptyBucketInfo.rounding.nextRoundingValue(lastBucket.key);
long max = bounds.max;
while (key <= max) {
iter.add(getFactory().createBucket(key, 0,
reducedEmptySubAggs, keyed,
formatter));
key = emptyBucketInfo.rounding.nextRoundingValue(key);
}
}
}
@Override
public InternalAggregation doReduce(List aggregations, ReduceContext reduceContext) {
List reducedBuckets = reduceBuckets(aggregations, reduceContext);
// adding empty buckets if needed
if (minDocCount == 0) {
addEmptyBuckets(reducedBuckets, reduceContext);
}
if (order == InternalOrder.KEY_ASC) {
// nothing to do, data are already sorted since shards return
// sorted buckets and the merge-sort performed by reduceBuckets
// maintains order
} else if (order == InternalOrder.KEY_DESC) {
// we just need to reverse here...
List reverse = new ArrayList<>(reducedBuckets);
Collections.reverse(reverse);
reducedBuckets = reverse;
} else {
// sorted by sub-aggregation, need to fall back to a costly n*log(n) sort
CollectionUtil.introSort(reducedBuckets, order.comparator());
}
return getFactory().create(getName(), reducedBuckets, order, minDocCount, emptyBucketInfo, formatter, keyed, pipelineAggregators(),
getMetaData());
}
@Override
protected void doReadFrom(StreamInput in) throws IOException {
this.factory = resolveFactory(in.readString());
order = InternalOrder.Streams.readOrder(in);
minDocCount = in.readVLong();
if (minDocCount == 0) {
emptyBucketInfo = EmptyBucketInfo.readFrom(in);
}
formatter = ValueFormatterStreams.readOptional(in);
keyed = in.readBoolean();
int size = in.readVInt();
List buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
B bucket = getFactory().createEmptyBucket(keyed, formatter);
bucket.readFrom(in);
buckets.add(bucket);
}
this.buckets = buckets;
}
@SuppressWarnings("unchecked")
private static Factory resolveFactory(String factoryType) {
if (factoryType.equals(InternalDateHistogram.TYPE.name())) {
return (Factory) new InternalDateHistogram.Factory();
} else if (factoryType.equals(TYPE.name())) {
return new Factory<>();
} else {
throw new IllegalStateException("Invalid histogram factory type [" + factoryType + "]");
}
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
out.writeString(factory.type());
InternalOrder.Streams.writeOrder(order, out);
out.writeVLong(minDocCount);
if (minDocCount == 0) {
EmptyBucketInfo.writeTo(emptyBucketInfo, out);
}
ValueFormatterStreams.writeOptional(formatter, out);
out.writeBoolean(keyed);
out.writeVInt(buckets.size());
for (B bucket : buckets) {
bucket.writeTo(out);
}
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
if (keyed) {
builder.startObject(CommonFields.BUCKETS);
} else {
builder.startArray(CommonFields.BUCKETS);
}
for (B bucket : buckets) {
bucket.toXContent(builder, params);
}
if (keyed) {
builder.endObject();
} else {
builder.endArray();
}
return builder;
}
}