
org.elasticsearch.search.aggregations.bucket.terms.AbstractInternalTerms Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.DelayedBucket;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.InternalOrder;
import org.elasticsearch.search.aggregations.TopBucketBuilder;
import org.elasticsearch.search.aggregations.bucket.IteratorAndCurrent;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import static org.elasticsearch.search.aggregations.InternalOrder.isKeyAsc;
import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder;
import static org.elasticsearch.search.aggregations.bucket.terms.InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME;
import static org.elasticsearch.search.aggregations.bucket.terms.InternalTerms.SUM_OF_OTHER_DOC_COUNTS;
/**
* Base class for terms and multi_terms aggregation that handles common reduce logic
*/
public abstract class AbstractInternalTerms, B extends AbstractInternalTerms.AbstractTermsBucket>
extends InternalMultiBucketAggregation {
public AbstractInternalTerms(String name, Map metadata) {
super(name, metadata);
}
protected AbstractInternalTerms(StreamInput in) throws IOException {
super(in);
}
public abstract static class AbstractTermsBucket extends InternalMultiBucketAggregation.InternalBucket {
protected abstract void updateDocCountError(long docCountErrorDiff);
protected abstract void setDocCountError(long docCountError);
protected abstract boolean getShowDocCountError();
protected abstract long getDocCountError();
}
/**
* Creates InternalTerms at the end of the merge
*/
protected abstract A create(String name, List buckets, BucketOrder reduceOrder, long docCountError, long otherDocCount);
protected abstract int getShardSize();
protected abstract BucketOrder getReduceOrder();
protected abstract BucketOrder getOrder();
protected abstract long getSumOfOtherDocCounts();
protected abstract Long getDocCountError();
protected abstract void setDocCountError(long docCountError);
protected abstract long getMinDocCount();
protected abstract int getRequiredSize();
protected abstract B createBucket(long docCount, InternalAggregations aggs, long docCountError, B prototype);
@Override
public B reduceBucket(List buckets, ReduceContext context) {
assert buckets.size() > 0;
long docCount = 0;
// For the per term doc count error we add up the errors from the
// shards that did not respond with the term. To do this we add up
// the errors from the shards that did respond with the terms and
// subtract that from the sum of the error from all shards
long docCountError = 0;
List aggregationsList = new ArrayList<>(buckets.size());
for (B bucket : buckets) {
docCount += bucket.getDocCount();
if (docCountError != -1) {
if (bucket.getShowDocCountError() == false || bucket.getDocCountError() == -1) {
docCountError = -1;
} else {
docCountError += bucket.getDocCountError();
}
}
aggregationsList.add((InternalAggregations) bucket.getAggregations());
}
InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);
return createBucket(docCount, aggs, docCountError, buckets.get(0));
}
private BucketOrder getReduceOrder(List aggregations) {
BucketOrder thisReduceOrder = null;
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
A terms = (A) aggregation;
if (terms.getBuckets().size() == 0) {
continue;
}
if (thisReduceOrder == null) {
thisReduceOrder = terms.getReduceOrder();
} else if (thisReduceOrder.equals(terms.getReduceOrder()) == false) {
return getOrder();
}
}
return thisReduceOrder != null ? thisReduceOrder : getOrder();
}
private long getDocCountError(A terms) {
int size = terms.getBuckets().size();
if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.getOrder())) {
return 0;
} else if (InternalOrder.isCountDesc(terms.getOrder())) {
if (terms.getDocCountError() != null) {
// If there is an existing docCountError for this agg then
// use this as the error for this aggregation
return terms.getDocCountError();
} else {
// otherwise use the doc count of the last term in the
// aggregation
return terms.getBuckets().stream().mapToLong(AbstractTermsBucket::getDocCount).min().getAsLong();
}
} else {
return -1;
}
}
/**
* Reduce the buckets of sub-aggregations.
* @param sink Handle the reduced buckets. Returns false if we should stop iterating the buckets, true if we should continue.
* @return the order we used to reduce the buckets
*/
private BucketOrder reduceBuckets(
List aggregations,
InternalAggregation.ReduceContext reduceContext,
Function, Boolean> sink
) {
/*
* Buckets returned by a partial reduce or a shard response are sorted by key since {@link Version#V_7_10_0}.
* That allows to perform a merge sort when reducing multiple aggregations together.
* For backward compatibility, we disable the merge sort and use ({@link #reduceLegacy} if any of
* the provided aggregations use a different {@link #reduceOrder}.
*/
BucketOrder thisReduceOrder = getReduceOrder(aggregations);
if (isKeyOrder(thisReduceOrder)) {
// extract the primary sort in case this is a compound order.
thisReduceOrder = InternalOrder.key(isKeyAsc(thisReduceOrder));
reduceMergeSort(aggregations, thisReduceOrder, reduceContext, sink);
} else {
reduceLegacy(aggregations, reduceContext, sink);
}
return thisReduceOrder;
}
private void reduceMergeSort(
List aggregations,
BucketOrder thisReduceOrder,
InternalAggregation.ReduceContext reduceContext,
Function, Boolean> sink
) {
assert isKeyOrder(thisReduceOrder);
final Comparator cmp = thisReduceOrder.comparator();
final PriorityQueue> pq = new PriorityQueue>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return cmp.compare(a.current(), b.current()) < 0;
}
};
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
A terms = (A) aggregation;
if (terms.getBuckets().isEmpty() == false) {
pq.add(new IteratorAndCurrent<>(terms.getBuckets().iterator()));
}
}
// list of buckets coming from different shards that have the same key
List sameTermBuckets = new ArrayList<>();
B lastBucket = null;
while (pq.size() > 0) {
final IteratorAndCurrent top = pq.top();
assert lastBucket == null || cmp.compare(top.current(), lastBucket) >= 0;
if (lastBucket != null && cmp.compare(top.current(), lastBucket) != 0) {
// the key changed so bundle up the last key's worth of buckets
boolean shouldContinue = sink.apply(
new DelayedBucket(AbstractInternalTerms.this::reduceBucket, reduceContext, sameTermBuckets)
);
if (false == shouldContinue) {
return;
}
sameTermBuckets = new ArrayList<>();
}
lastBucket = top.current();
sameTermBuckets.add(top.current());
if (top.hasNext()) {
top.next();
/*
* Typically the bucket keys are strictly increasing, but when we merge aggs from two different indices
* we can promote long and unsigned long keys to double, which can cause 2 long keys to be promoted into
* the same double key.
*/
assert cmp.compare(top.current(), lastBucket) >= 0 : "shards must return data sorted by key";
pq.updateTop();
} else {
pq.pop();
}
}
if (sameTermBuckets.isEmpty() == false) {
sink.apply(new DelayedBucket(AbstractInternalTerms.this::reduceBucket, reduceContext, sameTermBuckets));
}
}
private void reduceLegacy(
List aggregations,
InternalAggregation.ReduceContext reduceContext,
Function, Boolean> sink
) {
Map
© 2015 - 2025 Weber Informatics LLC | Privacy Policy