org.opensearch.search.aggregations.bucket.terms.InternalTerms Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.search.aggregations.bucket.terms;
import org.apache.lucene.util.PriorityQueue;
import org.opensearch.LegacyESVersion;
import org.opensearch.core.ParseField;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.AggregationExecutionException;
import org.opensearch.search.aggregations.Aggregations;
import org.opensearch.search.aggregations.BucketOrder;
import org.opensearch.search.aggregations.InternalAggregation;
import org.opensearch.search.aggregations.InternalAggregations;
import org.opensearch.search.aggregations.InternalMultiBucketAggregation;
import org.opensearch.search.aggregations.InternalOrder;
import org.opensearch.search.aggregations.KeyComparable;
import org.opensearch.search.aggregations.bucket.IteratorAndCurrent;
import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds;
import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
import static org.opensearch.search.aggregations.InternalOrder.isKeyAsc;
import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder;
/**
* Implementation of terms
*
* @opensearch.internal
*/
public abstract class InternalTerms, B extends InternalTerms.AbstractInternalBucket> extends
InternalMultiBucketAggregation
implements
Terms {
protected static final ParseField DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME = new ParseField("doc_count_error_upper_bound");
protected static final ParseField SUM_OF_OTHER_DOC_COUNTS = new ParseField("sum_other_doc_count");
/**
* Base internal multi bucket
*
* @opensearch.internal
*/
public abstract static class AbstractInternalBucket extends InternalMultiBucketAggregation.InternalBucket implements Terms.Bucket {
abstract void setDocCountError(long docCountError);
abstract void setDocCountError(Function updater);
abstract boolean showDocCountError();
}
/**
* Base bucket class
*
* @opensearch.internal
*/
public abstract static class Bucket> extends AbstractInternalBucket implements KeyComparable {
/**
* Reads a bucket. Should be a constructor reference.
*
* @opensearch.internal
*/
@FunctionalInterface
public interface Reader> {
B read(StreamInput in, DocValueFormat format, boolean showDocCountError) throws IOException;
}
long bucketOrd;
protected long docCount;
protected long docCountError;
protected InternalAggregations aggregations;
protected final boolean showDocCountError;
protected final DocValueFormat format;
protected Bucket(
long docCount,
InternalAggregations aggregations,
boolean showDocCountError,
long docCountError,
DocValueFormat formatter
) {
this.showDocCountError = showDocCountError;
this.format = formatter;
this.docCount = docCount;
this.aggregations = aggregations;
this.docCountError = docCountError;
}
/**
* Read from a stream.
*/
protected Bucket(StreamInput in, DocValueFormat formatter, boolean showDocCountError) throws IOException {
this.showDocCountError = showDocCountError;
this.format = formatter;
docCount = in.readVLong();
docCountError = -1;
if (showDocCountError) {
docCountError = in.readLong();
}
aggregations = InternalAggregations.readFrom(in);
}
@Override
public final void writeTo(StreamOutput out) throws IOException {
out.writeVLong(getDocCount());
if (showDocCountError) {
out.writeLong(docCountError);
}
aggregations.writeTo(out);
writeTermTo(out);
}
protected abstract void writeTermTo(StreamOutput out) throws IOException;
@Override
public long getDocCount() {
return docCount;
}
@Override
public long getDocCountError() {
if (!showDocCountError) {
throw new IllegalStateException("show_terms_doc_count_error is false");
}
return docCountError;
}
@Override
public void setDocCountError(long docCountError) {
this.docCountError = docCountError;
}
@Override
public void setDocCountError(Function updater) {
this.docCountError = updater.apply(this.docCountError);
}
@Override
public boolean showDocCountError() {
return showDocCountError;
}
@Override
public Aggregations getAggregations() {
return aggregations;
}
@Override
public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
keyToXContent(builder);
builder.field(CommonFields.DOC_COUNT.getPreferredName(), getDocCount());
if (showDocCountError) {
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME.getPreferredName(), getDocCountError());
}
aggregations.toXContentInternal(builder, params);
builder.endObject();
return builder;
}
protected abstract XContentBuilder keyToXContent(XContentBuilder builder) throws IOException;
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
Bucket> that = (Bucket>) obj;
// No need to take format and showDocCountError, they are attributes
// of the parent terms aggregation object that are only copied here
// for serialization purposes
return Objects.equals(docCount, that.docCount)
&& Objects.equals(docCountError, that.docCountError)
&& Objects.equals(aggregations, that.aggregations);
}
@Override
public int hashCode() {
return Objects.hash(getClass(), docCount, docCountError, aggregations);
}
}
protected final BucketOrder reduceOrder;
protected final BucketOrder order;
protected final int requiredSize;
protected final long minDocCount;
protected final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private boolean hasSliceLevelDocCountError = false;
/**
* Creates a new {@link InternalTerms}
* @param name The name of the aggregation
* @param reduceOrder The {@link BucketOrder} that should be used to merge shard results.
* @param order The {@link BucketOrder} that should be used to sort the final reduce.
* @param bucketCountThresholds Object containing values for minDocCount, shardMinDocCount, size, shardSize.
* @param metadata The metadata associated with the aggregation.
*/
protected InternalTerms(
String name,
BucketOrder reduceOrder,
BucketOrder order,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
Map metadata
) {
super(name, metadata);
this.reduceOrder = reduceOrder;
this.order = order;
this.bucketCountThresholds = bucketCountThresholds;
this.requiredSize = bucketCountThresholds.getRequiredSize();
this.minDocCount = bucketCountThresholds.getMinDocCount();
}
/**
* Read from a stream.
*/
protected InternalTerms(StreamInput in) throws IOException {
super(in);
reduceOrder = InternalOrder.Streams.readOrder(in);
if (in.getVersion().onOrAfter(LegacyESVersion.V_7_10_0)) {
order = InternalOrder.Streams.readOrder(in);
} else {
order = reduceOrder;
}
requiredSize = readSize(in);
minDocCount = in.readVLong();
// shardMinDocCount and shardSize are not used on the coordinator, so they are not deserialized. We use
// CoordinatorBucketCountThresholds which will throw an exception if they are accessed.
bucketCountThresholds = new TermsAggregator.CoordinatorBucketCountThresholds(minDocCount, -1, requiredSize, getShardSize());
}
@Override
protected final void doWriteTo(StreamOutput out) throws IOException {
if (out.getVersion().onOrAfter(LegacyESVersion.V_7_10_0)) {
reduceOrder.writeTo(out);
}
order.writeTo(out);
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
writeTermTypeInfoTo(out);
}
protected abstract void writeTermTypeInfoTo(StreamOutput out) throws IOException;
@Override
public abstract List getBuckets();
@Override
public abstract B getBucketByKey(String term);
private BucketOrder getReduceOrder(List aggregations) {
BucketOrder thisReduceOrder = null;
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
InternalTerms terms = (InternalTerms) aggregation;
if (terms.getBuckets().size() == 0) {
continue;
}
if (thisReduceOrder == null) {
thisReduceOrder = terms.reduceOrder;
} else if (thisReduceOrder.equals(terms.reduceOrder) == false) {
return order;
}
}
return thisReduceOrder != null ? thisReduceOrder : order;
}
private long getDocCountError(InternalTerms, ?> terms, ReduceContext reduceContext) {
int size = terms.getBuckets().size();
if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.order)) {
return 0;
} else if (InternalOrder.isCountDesc(terms.order)) {
if (terms.getDocCountError() > 0) {
// If there is an existing docCountError for this agg then
// use this as the error for this aggregation
return terms.getDocCountError();
} else {
// otherwise use the doc count of the last term in the
// aggregation
return terms.getBuckets().stream().mapToLong(MultiBucketsAggregation.Bucket::getDocCount).min().getAsLong();
}
} else {
return -1;
}
}
private List reduceMergeSort(List aggregations, BucketOrder thisReduceOrder, ReduceContext reduceContext) {
assert isKeyOrder(thisReduceOrder);
final Comparator cmp = thisReduceOrder.comparator();
final PriorityQueue> pq = new PriorityQueue>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return cmp.compare(a.current(), b.current()) < 0;
}
};
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
InternalTerms terms = (InternalTerms) aggregation;
if (terms.getBuckets().isEmpty() == false) {
assert reduceOrder.equals(reduceOrder);
pq.add(new IteratorAndCurrent(terms.getBuckets().iterator()));
}
}
List reducedBuckets = new ArrayList<>();
// list of buckets coming from different shards that have the same key
List currentBuckets = new ArrayList<>();
B lastBucket = null;
while (pq.size() > 0) {
final IteratorAndCurrent top = pq.top();
assert lastBucket == null || cmp.compare(top.current(), lastBucket) >= 0;
if (lastBucket != null && cmp.compare(top.current(), lastBucket) != 0) {
// the key changes, reduce what we already buffered and reset the buffer for current buckets
final B reduced = reduceBucket(currentBuckets, reduceContext);
reducedBuckets.add(reduced);
currentBuckets.clear();
}
lastBucket = top.current();
currentBuckets.add(top.current());
if (top.hasNext()) {
top.next();
assert cmp.compare(top.current(), lastBucket) > 0 : "shards must return data sorted by key";
pq.updateTop();
} else {
pq.pop();
}
}
if (currentBuckets.isEmpty() == false) {
final B reduced = reduceBucket(currentBuckets, reduceContext);
reducedBuckets.add(reduced);
}
return reducedBuckets;
}
private List reduceLegacy(List aggregations, ReduceContext reduceContext) {
Map