org.elasticsearch.search.aggregations.bucket.terms.AbstractInternalTerms Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregationReduceContext;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.DelayedBucket;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.InternalOrder;
import org.elasticsearch.search.aggregations.TopBucketBuilder;
import org.elasticsearch.search.aggregations.bucket.IteratorAndCurrent;
import org.elasticsearch.search.aggregations.support.SamplingContext;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import static org.elasticsearch.search.aggregations.InternalOrder.isKeyAsc;
import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder;
import static org.elasticsearch.search.aggregations.bucket.terms.InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME;
import static org.elasticsearch.search.aggregations.bucket.terms.InternalTerms.SUM_OF_OTHER_DOC_COUNTS;
/**
* Base class for terms and multi_terms aggregation that handles common reduce logic
*/
public abstract class AbstractInternalTerms, B extends AbstractInternalTerms.AbstractTermsBucket>
extends InternalMultiBucketAggregation {
public AbstractInternalTerms(String name, Map metadata) {
super(name, metadata);
}
protected AbstractInternalTerms(StreamInput in) throws IOException {
super(in);
}
public abstract static class AbstractTermsBucket extends InternalMultiBucketAggregation.InternalBucket {
protected abstract void updateDocCountError(long docCountErrorDiff);
protected abstract void setDocCountError(long docCountError);
protected abstract boolean getShowDocCountError();
protected abstract long getDocCountError();
}
/**
* Creates InternalTerms at the end of the merge
*/
protected abstract A create(String name, List buckets, BucketOrder reduceOrder, long docCountError, long otherDocCount);
protected abstract int getShardSize();
protected abstract BucketOrder getReduceOrder();
protected abstract BucketOrder getOrder();
protected abstract long getSumOfOtherDocCounts();
protected abstract Long getDocCountError();
protected abstract void setDocCountError(long docCountError);
protected abstract long getMinDocCount();
protected abstract int getRequiredSize();
protected abstract B createBucket(long docCount, InternalAggregations aggs, long docCountError, B prototype);
@Override
public B reduceBucket(List buckets, AggregationReduceContext context) {
assert buckets.size() > 0;
long docCount = 0;
// For the per term doc count error we add up the errors from the
// shards that did not respond with the term. To do this we add up
// the errors from the shards that did respond with the terms and
// subtract that from the sum of the error from all shards
long docCountError = 0;
List aggregationsList = new ArrayList<>(buckets.size());
for (B bucket : buckets) {
docCount += bucket.getDocCount();
if (docCountError != -1) {
if (bucket.getShowDocCountError() == false || bucket.getDocCountError() == -1) {
docCountError = -1;
} else {
docCountError += bucket.getDocCountError();
}
}
aggregationsList.add((InternalAggregations) bucket.getAggregations());
}
InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);
return createBucket(docCount, aggs, docCountError, buckets.get(0));
}
private BucketOrder getReduceOrder(List aggregations) {
BucketOrder thisReduceOrder = null;
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
A terms = (A) aggregation;
if (terms.getBuckets().size() == 0) {
continue;
}
if (thisReduceOrder == null) {
thisReduceOrder = terms.getReduceOrder();
} else if (thisReduceOrder.equals(terms.getReduceOrder()) == false) {
return getOrder();
}
}
return thisReduceOrder != null ? thisReduceOrder : getOrder();
}
private long getDocCountError(A terms) {
int size = terms.getBuckets().size();
if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.getOrder())) {
return 0;
} else if (InternalOrder.isCountDesc(terms.getOrder())) {
if (terms.getDocCountError() != null) {
// If there is an existing docCountError for this agg then
// use this as the error for this aggregation
return terms.getDocCountError();
} else {
// otherwise use the doc count of the last term in the
// aggregation
return terms.getBuckets().stream().mapToLong(AbstractTermsBucket::getDocCount).min().getAsLong();
}
} else {
return -1;
}
}
/**
* Reduce the buckets of sub-aggregations.
* @param sink Handle the reduced buckets. Returns false if we should stop iterating the buckets, true if we should continue.
* @return the order we used to reduce the buckets
*/
private BucketOrder reduceBuckets(
List aggregations,
AggregationReduceContext reduceContext,
Function, Boolean> sink
) {
/*
* Buckets returned by a partial reduce or a shard response are sorted by key since {@link Version#V_7_10_0}.
* That allows to perform a merge sort when reducing multiple aggregations together.
* For backward compatibility, we disable the merge sort and use ({@link #reduceLegacy} if any of
* the provided aggregations use a different {@link #reduceOrder}.
*/
BucketOrder thisReduceOrder = getReduceOrder(aggregations);
if (isKeyOrder(thisReduceOrder)) {
// extract the primary sort in case this is a compound order.
thisReduceOrder = InternalOrder.key(isKeyAsc(thisReduceOrder));
reduceMergeSort(aggregations, thisReduceOrder, reduceContext, sink);
} else {
reduceLegacy(aggregations, reduceContext, sink);
}
return thisReduceOrder;
}
private void reduceMergeSort(
List aggregations,
BucketOrder thisReduceOrder,
AggregationReduceContext reduceContext,
Function, Boolean> sink
) {
assert isKeyOrder(thisReduceOrder);
final Comparator cmp = thisReduceOrder.comparator();
final PriorityQueue> pq = new PriorityQueue<>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return cmp.compare(a.current(), b.current()) < 0;
}
};
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
A terms = (A) aggregation;
if (terms.getBuckets().isEmpty() == false) {
pq.add(new IteratorAndCurrent<>(terms.getBuckets().iterator()));
}
}
// list of buckets coming from different shards that have the same key
List sameTermBuckets = new ArrayList<>();
B lastBucket = null;
while (pq.size() > 0) {
final IteratorAndCurrent top = pq.top();
assert lastBucket == null || cmp.compare(top.current(), lastBucket) >= 0;
if (lastBucket != null && cmp.compare(top.current(), lastBucket) != 0) {
// the key changed so bundle up the last key's worth of buckets
boolean shouldContinue = sink.apply(
new DelayedBucket(AbstractInternalTerms.this::reduceBucket, reduceContext, sameTermBuckets)
);
if (false == shouldContinue) {
return;
}
sameTermBuckets = new ArrayList<>();
}
lastBucket = top.current();
sameTermBuckets.add(top.current());
if (top.hasNext()) {
top.next();
/*
* Typically the bucket keys are strictly increasing, but when we merge aggs from two different indices
* we can promote long and unsigned long keys to double, which can cause 2 long keys to be promoted into
* the same double key.
*/
assert cmp.compare(top.current(), lastBucket) >= 0 : "shards must return data sorted by key";
pq.updateTop();
} else {
pq.pop();
}
}
if (sameTermBuckets.isEmpty() == false) {
sink.apply(new DelayedBucket(AbstractInternalTerms.this::reduceBucket, reduceContext, sameTermBuckets));
}
}
private void reduceLegacy(
List aggregations,
AggregationReduceContext reduceContext,
Function, Boolean> sink
) {
Map