org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.servicemix.bundles.elasticsearch
Show all versions of org.apache.servicemix.bundles.elasticsearch
This OSGi bundle wraps ${pkgArtifactId} ${pkgVersion} jar file.
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalOrder;
import org.elasticsearch.search.aggregations.InternalOrder.CompoundOrder;
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
import org.elasticsearch.search.aggregations.bucket.BucketUtils;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(TermsAggregatorFactory.class));
static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS;
private final BucketOrder order;
private final IncludeExclude includeExclude;
private final String executionHint;
private final SubAggCollectionMode collectMode;
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private final boolean showTermDocCountError;
TermsAggregatorFactory(String name,
ValuesSourceConfig config,
BucketOrder order,
IncludeExclude includeExclude,
String executionHint,
SubAggCollectionMode collectMode,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
boolean showTermDocCountError,
SearchContext context,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData);
this.order = order;
this.includeExclude = includeExclude;
this.executionHint = executionHint;
this.collectMode = collectMode;
this.bucketCountThresholds = bucketCountThresholds;
this.showTermDocCountError = showTermDocCountError;
}
@Override
protected Aggregator createUnmapped(Aggregator parent, List pipelineAggregators, Map metaData)
throws IOException {
final InternalAggregation aggregation = new UnmappedTerms(name, order, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), pipelineAggregators, metaData);
return new NonCollectingAggregator(name, context, parent, factories, pipelineAggregators, metaData) {
{
// even in the case of an unmapped aggregator, validate the
// order
InternalOrder.validate(order, this);
}
@Override
public InternalAggregation buildEmptyAggregation() {
return aggregation;
}
};
}
private static boolean isAggregationSort(BucketOrder order) {
if (order instanceof InternalOrder.Aggregation) {
return true;
} else if (order instanceof InternalOrder.CompoundOrder) {
InternalOrder.CompoundOrder compoundOrder = (CompoundOrder) order;
return compoundOrder.orderElements().stream().anyMatch(TermsAggregatorFactory::isAggregationSort);
} else {
return false;
}
}
@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket,
List pipelineAggregators, Map metaData) throws IOException {
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, context, parent);
}
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
if (InternalOrder.isKeyOrder(order) == false
&& bucketCountThresholds.getShardSize() == TermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
// The user has not made a shardSize selection. Use default
// heuristic to avoid any wrong-ranking caused by distributed
// counting
bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize()));
}
bucketCountThresholds.ensureValidity();
if (valuesSource instanceof ValuesSource.Bytes) {
ExecutionMode execution = null;
if (executionHint != null) {
execution = ExecutionMode.fromString(executionHint, deprecationLogger);
}
// In some cases, using ordinals is just not supported: override it
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) {
execution = ExecutionMode.MAP;
}
final long maxOrd = execution == ExecutionMode.GLOBAL_ORDINALS ? getMaxOrd(valuesSource, context.searcher()) : -1;
if (execution == null) {
execution = ExecutionMode.GLOBAL_ORDINALS;
}
SubAggCollectionMode cm = collectMode;
if (cm == null) {
cm = SubAggCollectionMode.DEPTH_FIRST;
if (factories != AggregatorFactories.EMPTY) {
cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
}
}
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style "
+ "include/exclude settings as they can only be applied to string fields. Use an array of values for "
+ "include/exclude clauses");
}
return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent,
cm, showTermDocCountError, pipelineAggregators, metaData);
}
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style "
+ "include/exclude settings as they can only be applied to string fields. Use an array of numeric values for "
+ "include/exclude clauses used to filter numeric fields");
}
if (valuesSource instanceof ValuesSource.Numeric) {
IncludeExclude.LongFilter longFilter = null;
SubAggCollectionMode cm = collectMode;
if (cm == null) {
if (factories != AggregatorFactories.EMPTY) {
cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
} else {
cm = SubAggCollectionMode.DEPTH_FIRST;
}
}
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
if (includeExclude != null) {
longFilter = includeExclude.convertToDoubleFilter();
}
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter,
pipelineAggregators, metaData);
}
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators,
metaData);
}
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field()
+ "]. It can only be applied to numeric or string fields.");
}
// return the SubAggCollectionMode that this aggregation should use based on the expected size
// and the cardinality of the field
static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
if (expectedSize == Integer.MAX_VALUE) {
// return all buckets
return SubAggCollectionMode.DEPTH_FIRST;
}
if (maxOrd == -1 || maxOrd > expectedSize) {
// use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
return SubAggCollectionMode.BREADTH_FIRST;
}
return SubAggCollectionMode.DEPTH_FIRST;
}
/**
* Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
* if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.
*/
static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException {
if (source instanceof ValuesSource.Bytes.WithOrdinals) {
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source;
return valueSourceWithOrdinals.globalMaxOrd(searcher);
} else {
return -1;
}
}
public enum ExecutionMode {
MAP(new ParseField("map")) {
@Override
Aggregator create(String name,
AggregatorFactories factories,
ValuesSource valuesSource,
BucketOrder order,
DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context,
Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List pipelineAggregators,
Map metaData) throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
}
},
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
@Override
Aggregator create(String name,
AggregatorFactories factories,
ValuesSource valuesSource,
BucketOrder order,
DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context, Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List pipelineAggregators,
Map metaData) throws IOException {
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
assert maxOrd != -1;
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
if (factories == AggregatorFactories.EMPTY &&
includeExclude == null &&
Aggregator.descendsFromBucketAggregator(parent) == false &&
// we use the static COLLECT_SEGMENT_ORDS to allow tests to force specific optimizations
(COLLECT_SEGMENT_ORDS!= null ? COLLECT_SEGMENT_ORDS.booleanValue() : ratio <= 0.5 && maxOrd <= 2048)) {
/**
* We can use the low cardinality execution mode iff this aggregator:
* - has no sub-aggregator AND
* - is not a child of a bucket aggregator AND
* - At least we reduce the number of global ordinals look-ups by half (ration <= 0.5) AND
* - the maximum global ordinal is less than 2048 (LOW_CARDINALITY has additional memory usage,
* which directly linked to maxOrd, so we need to limit).
*/
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories,
(ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent, false,
subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
}
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
boolean remapGlobalOrds;
if (REMAP_GLOBAL_ORDS != null) {
// We use REMAP_GLOBAL_ORDS to allow tests to force specific optimizations
remapGlobalOrds = REMAP_GLOBAL_ORDS.booleanValue();
} else {
remapGlobalOrds = true;
if (includeExclude == null &&
Aggregator.descendsFromBucketAggregator(parent) == false &&
(factories == AggregatorFactories.EMPTY ||
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
/**
* We don't need to remap global ords iff this aggregator:
* - has no include/exclude rules AND
* - is not a child of a bucket aggregator AND
* - has no sub-aggregator or only sub-aggregator that can be deferred
* ({@link SubAggCollectionMode#BREADTH_FIRST}).
**/
remapGlobalOrds = false;
}
}
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData);
}
};
public static ExecutionMode fromString(String value, final DeprecationLogger deprecationLogger) {
switch (value) {
case "global_ordinals":
return GLOBAL_ORDINALS;
case "map":
return MAP;
default:
throw new IllegalArgumentException("Unknown `execution_hint`: [" + value + "], expected any of [map, global_ordinals]");
}
}
private final ParseField parseField;
ExecutionMode(ParseField parseField) {
this.parseField = parseField;
}
abstract Aggregator create(String name,
AggregatorFactories factories,
ValuesSource valuesSource,
BucketOrder order,
DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context,
Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List pipelineAggregators,
Map metaData) throws IOException;
@Override
public String toString() {
return parseField.getPreferredName();
}
}
}