All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.search.aggregations.support.ValuesSourceConfig Maven / Gradle / Ivy

There is a newer version: 8.16.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.search.aggregations.support;

import org.elasticsearch.common.Rounding;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.script.AggregationScript;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.DocValueFormat;

import java.io.IOException;
import java.time.ZoneId;
import java.util.function.DoubleUnaryOperator;
import java.util.function.Function;
import java.util.function.LongSupplier;

/**
 * A configuration that tells aggregations how to retrieve data from the index
 * in order to run a specific aggregation.
 */
public class ValuesSourceConfig {

    /**
     * Given the query context and other information, decide on the input {@link ValuesSource} for this aggregation run, and construct a new
     * {@link ValuesSourceConfig} based on that {@link ValuesSourceType}
     *
     * @param context - the query context
     * @param userValueTypeHint - User specified value type; used for missing values and scripts
     * @param field - The field being aggregated over.  At least one of field and script must not be null
     * @param script - The script the user specified.  At least one of field and script must not be null
     * @param missing - A user specified value to apply when the field is missing.  Should be of type userValueTypeHint
     * @param timeZone - Used to generate a format for dates
     * @param format - The format string to apply to this field.  Confusingly, this is used for input parsing as well as output formatting
     *               See https://github.com/elastic/elasticsearch/issues/47469
     * @param defaultValueSourceType - per-aggregation {@link ValuesSource} of last resort.
     * @return - An initialized {@link ValuesSourceConfig} that will yield the appropriate {@link ValuesSourceType}
     */
    public static ValuesSourceConfig resolve(
        AggregationContext context,
        ValueType userValueTypeHint,
        String field,
        Script script,
        Object missing,
        ZoneId timeZone,
        String format,
        ValuesSourceType defaultValueSourceType
    ) {

        return internalResolve(
            context,
            userValueTypeHint,
            field,
            script,
            missing,
            timeZone,
            format,
            defaultValueSourceType,
            ValuesSourceConfig::getMappingFromRegistry
        );
    }

    /**
     * AKA legacy resolve.  This method should be called by aggregations not supported by the {@link ValuesSourceRegistry}, to use the
     * pre-registry logic to decide on the {@link ValuesSourceType}.  New aggregations which extend from
     * {@link ValuesSourceAggregationBuilder} should not use this method, preferring {@link ValuesSourceConfig#resolve} instead.
     *
     * @param context - the query context
     * @param userValueTypeHint - User specified value type; used for missing values and scripts
     * @param field - The field being aggregated over.  At least one of field and script must not be null
     * @param script - The script the user specified.  At least one of field and script must not be null
     * @param missing - A user specified value to apply when the field is missing.  Should be of type userValueTypeHint
     * @param timeZone - Used to generate a format for dates
     * @param format - The format string to apply to this field.  Confusingly, this is used for input parsing as well as output formatting
     *               See https://github.com/elastic/elasticsearch/issues/47469
     * @param defaultValueSourceType - per-aggregation {@link ValuesSource} of last resort.
     * @return - An initialized {@link ValuesSourceConfig} that will yield the appropriate {@link ValuesSourceType}
     */
    public static ValuesSourceConfig resolveUnregistered(
        AggregationContext context,
        ValueType userValueTypeHint,
        String field,
        Script script,
        Object missing,
        ZoneId timeZone,
        String format,
        ValuesSourceType defaultValueSourceType
    ) {
        return internalResolve(
            context,
            userValueTypeHint,
            field,
            script,
            missing,
            timeZone,
            format,
            defaultValueSourceType,
            ValuesSourceConfig::getLegacyMapping
        );
    }

    private static ValuesSourceConfig internalResolve(
        AggregationContext context,
        ValueType userValueTypeHint,
        String field,
        Script script,
        Object missing,
        ZoneId timeZone,
        String format,
        ValuesSourceType defaultValueSourceType,
        FieldResolver fieldResolver
    ) {
        ValuesSourceConfig config;
        ValuesSourceType valuesSourceType = null;
        ValueType scriptValueType = userValueTypeHint;
        FieldContext fieldContext = null;
        AggregationScript.LeafFactory aggregationScript = createScript(script, context); // returns null if script is null
        boolean unmapped = false;
        if (userValueTypeHint != null) {
            // If the user gave us a type hint, respect that.
            valuesSourceType = userValueTypeHint.getValuesSourceType();
        }
        if (field == null) {
            if (script == null) {
                throw new IllegalStateException("value source config is invalid; must have either a field or a script");
            }
        } else {
            // Field case
            fieldContext = context.buildFieldContext(field);
            if (fieldContext == null) {
                /* Unmapped Field Case
                 * We got here because the user specified a field, but it doesn't exist on this index, possibly because of a wildcard index
                 * pattern.  In this case, we're going to end up using the EMPTY variant of the ValuesSource, and possibly applying a user
                 * specified missing value.
                 */
                unmapped = true;
                aggregationScript = null;  // Value scripts are not allowed on unmapped fields. What would that do, anyway?
            } else {
                if (valuesSourceType == null) {
                    // We have a field, and the user didn't specify a type, so get the type from the field
                    valuesSourceType = fieldResolver.getValuesSourceType(fieldContext, userValueTypeHint, defaultValueSourceType);
                } else if (valuesSourceType != fieldResolver.getValuesSourceType(fieldContext, userValueTypeHint, defaultValueSourceType)
                    && script == null) {
                        /*
                         * This is the case where the user has specified the type they expect, but we found a field of a different type.
                         * Usually this happens because of a mapping error, e.g. an older index mapped an IP address as a keyword.  If
                         * the aggregation proceeds, it will usually break during reduction and return no results.  So instead, we fail the
                         * shard with the conflict at this point, allowing the correctly mapped shards to return results with a partial
                         * failure.
                         *
                         * Note that if a script is specified, the assumption is that the script adapts the field into the specified type,
                         * and we allow the aggregation to continue.
                         */
                        throw new IllegalArgumentException(
                            "Field type ["
                                + fieldContext.getTypeName()
                                + "] is incompatible with specified value_type ["
                                + userValueTypeHint
                                + "]"
                        );
                    }
            }
        }
        if (valuesSourceType == null) {
            valuesSourceType = defaultValueSourceType;
        }
        DocValueFormat docValueFormat = resolveFormat(format, valuesSourceType, timeZone, fieldContext);
        config = new ValuesSourceConfig(
            valuesSourceType,
            fieldContext,
            unmapped,
            aggregationScript,
            scriptValueType,
            missing,
            docValueFormat,
            context::nowInMillis
        );
        return config;
    }

    @FunctionalInterface
    private interface FieldResolver {
        ValuesSourceType getValuesSourceType(
            FieldContext fieldContext,
            ValueType userValueTypeHint,
            ValuesSourceType defaultValuesSourceType
        );

    }

    private static ValuesSourceType getMappingFromRegistry(
        FieldContext fieldContext,
        ValueType userValueTypeHint,
        ValuesSourceType defaultValuesSourceType
    ) {
        return fieldContext.indexFieldData().getValuesSourceType();
    }

    private static ValuesSourceType getLegacyMapping(
        FieldContext fieldContext,
        ValueType userValueTypeHint,
        ValuesSourceType defaultValuesSourceType
    ) {
        IndexFieldData indexFieldData = fieldContext.indexFieldData();
        if (indexFieldData instanceof IndexNumericFieldData) {
            return CoreValuesSourceType.NUMERIC;
        } else if (indexFieldData instanceof IndexGeoPointFieldData) {
            return CoreValuesSourceType.GEOPOINT;
        } else if (fieldContext.fieldType() instanceof RangeFieldMapper.RangeFieldType) {
            return CoreValuesSourceType.RANGE;
        } else {
            if (userValueTypeHint == null) {
                return defaultValuesSourceType;
            } else {
                return userValueTypeHint.getValuesSourceType();
            }
        }
    }

    private static AggregationScript.LeafFactory createScript(Script script, AggregationContext context) {
        if (script == null) {
            return null;
        } else {
            AggregationScript.Factory factory = context.compile(script, AggregationScript.CONTEXT);
            return factory.newFactory(script.getParams(), context.lookup());
        }
    }

    private static DocValueFormat resolveFormat(
        @Nullable String format,
        @Nullable ValuesSourceType valuesSourceType,
        @Nullable ZoneId tz,
        @Nullable FieldContext fieldContext
    ) {
        if (fieldContext != null) {
            return fieldContext.fieldType().docValueFormat(format, tz);
        }
        // Script or Unmapped case
        return valuesSourceType.getFormatter(format, tz);
    }

    /**
     * Special case factory method, intended to be used by aggregations which have some specialized logic for figuring out what field they
     * are operating on, for example Parent and Child join aggregations, which use the join relation to find the field they are reading from
     * rather than a user specified field.
     */
    public static ValuesSourceConfig resolveFieldOnly(MappedFieldType fieldType, AggregationContext context) {
        FieldContext fieldContext = context.buildFieldContext(fieldType);
        ValuesSourceType vstype = fieldContext.indexFieldData().getValuesSourceType();
        return new ValuesSourceConfig(vstype, fieldContext, false, null, null, null, null, context::nowInMillis);
    }

    /**
     * Convenience method for creating unmapped configs
     */
    public static ValuesSourceConfig resolveUnmapped(ValuesSourceType valuesSourceType, AggregationContext context) {
        return new ValuesSourceConfig(valuesSourceType, null, true, null, null, null, null, context::nowInMillis);
    }

    private final ValuesSourceType valuesSourceType;
    private final FieldContext fieldContext;
    private final AggregationScript.LeafFactory script;
    private final ValueType scriptValueType;
    private final boolean unmapped;
    private final DocValueFormat format;
    private final Object missing;
    private final ValuesSource valuesSource;

    @SuppressWarnings("this-escape")
    public ValuesSourceConfig(
        ValuesSourceType valuesSourceType,
        FieldContext fieldContext,
        boolean unmapped,
        AggregationScript.LeafFactory script,
        ValueType scriptValueType,
        Object missing,
        DocValueFormat format,
        LongSupplier nowInMillis
    ) {
        if (unmapped && fieldContext != null) {
            throw new IllegalStateException("value source config is invalid; marked as unmapped but specified a mapped field");
        }
        this.valuesSourceType = valuesSourceType;
        this.fieldContext = fieldContext;
        this.unmapped = unmapped;
        this.script = script;
        this.scriptValueType = scriptValueType;
        this.missing = missing;
        this.format = format == null ? DocValueFormat.RAW : format;

        if (valid() == false) {
            // TODO: resolve no longer generates invalid configs. Once VSConfig is immutable, we can drop this check
            throw new IllegalStateException(
                "value source config is invalid; must have either a field context or a script or marked as unwrapped"
            );
        }
        valuesSource = constructValuesSource(missing, format, nowInMillis);
    }

    private ValuesSource constructValuesSource(Object missing, DocValueFormat format, LongSupplier nowInMillis) {
        final ValuesSource vs;
        if (this.unmapped) {
            vs = valueSourceType().getEmpty();
        } else {
            if (fieldContext() == null) {
                // Script case
                vs = valueSourceType().getScript(script(), scriptValueType());
            } else {
                // Field or Value Script case
                vs = valueSourceType().getField(fieldContext(), script());
            }
        }

        if (missing() != null) {
            return valueSourceType().replaceMissing(vs, missing, format, nowInMillis);
        } else {
            return vs;
        }
    }

    public ValuesSourceType valueSourceType() {
        return valuesSourceType;
    }

    public FieldContext fieldContext() {
        return fieldContext;
    }

    /**
     * Returns a function from the mapper that adjusts a double value to the value it would have been had it been parsed by that mapper
     * and then cast up to a double.  Used to correct precision errors.
     */
    public DoubleUnaryOperator reduceToStoredPrecisionFunction() {
        if (fieldContext() != null && fieldType() instanceof NumberFieldMapper.NumberFieldType) {
            return ((NumberFieldMapper.NumberFieldType) fieldType())::reduceToStoredPrecision;
        }
        return (value) -> value;
    }

    /**
     * Convenience method for looking up the mapped field type backing this values source, if it exists.
     */
    @Nullable
    public MappedFieldType fieldType() {
        return fieldContext == null ? null : fieldContext.fieldType();
    }

    public AggregationScript.LeafFactory script() {
        return script;
    }

    /**
     * Returns true if the values source configured by this object can yield values.  We might not be able to yield values if, for example,
     * the specified field does not exist on this index.
     */
    public boolean hasValues() {
        return fieldContext != null || script != null || missing != null;
    }

    public boolean valid() {
        return fieldContext != null || script != null || unmapped;
    }

    public ValueType scriptValueType() {
        return this.scriptValueType;
    }

    public Object missing() {
        return this.missing;
    }

    public DocValueFormat format() {
        return format;
    }

    public ValuesSource getValuesSource() {
        return valuesSource;
    }

    /**
     * Build a function prepares rounding values to be called many times.
     * 

* This returns a {@linkplain Function} because auto date histogram will * need to call it many times over the course of running the aggregation. */ public Function roundingPreparer(AggregationContext context) throws IOException { return valuesSource.roundingPreparer(context); } /** * Check if this values source supports segment ordinals. Global ordinals might or might not be supported. *

* If this returns {@code true} then it is safe to cast it to {@link ValuesSource.Bytes.WithOrdinals}. * Call {@link ValuesSource.Bytes.WithOrdinals#supportsGlobalOrdinalsMapping} to find out if global ordinals are supported. * */ public boolean hasOrdinals() { return valuesSource.hasOrdinals(); } /** * This method is used when an aggregation can optimize by using the indexed data instead of the doc values. We check to see if the * indexed data will match the values source output (meaning there isn't a script or a missing value, since both could modify the * value at read time). If the settings allow for it, we then ask the {@link ValuesSourceType} to build the actual point reader * based on the field type. This allows for a point of extensibility in plugins. * * @return null if we cannot apply the optimization, otherwise the point reader function. */ @Nullable public Function getPointReaderOrNull() { return alignesWithSearchIndex() ? fieldType().pointReaderIfPossible() : null; } /** * Do {@link ValuesSource}s built by this config line up with the search * index of the underlying field? This'll only return true if the fields * is searchable and there aren't missing values or a script to confuse * the ordering. */ public boolean alignesWithSearchIndex() { return script() == null && missing() == null && fieldType() != null && fieldType().isIndexed(); } /** * Returns a human readable description of this values source, for use in error messages and similar. */ public String getDescription() { if (script != null) { return "Script yielding [" + (scriptValueType != null ? scriptValueType.getPreferredName() : "unknown type") + "]"; } MappedFieldType fieldType = fieldType(); if (fieldType != null) { String typeName = fieldType.typeName(); String valuesSourceTypeName = valuesSourceType.typeName(); if (valuesSourceType instanceof TimeSeriesValuesSourceType) { return "Field [" + fieldType.name() + "] of type [" + typeName + "][" + valuesSourceTypeName + "]"; } else { // Avoid repeated names. Currently only time series values source types have a different behaviour/validation. return "Field [" + fieldType.name() + "] of type [" + typeName + "]"; } } return "unmapped field with value source type [" + valuesSourceType.typeName() + "]"; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy