org.elasticsearch.search.fetch.subphase.FieldFetcher Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
There is a newer version: 8.16.0
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.search.fetch.subphase;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NestedValueFetcher;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * A helper class to {@link FetchFieldsPhase} that's initialized with a list of field patterns to fetch.
 * Then given a specific document, it can retrieve the corresponding fields from the document's source.
 */
public class FieldFetcher {

    /**
     * Default maximum number of states in the automaton that looks up unmapped fields.
     */
    private static final int AUTOMATON_MAX_DETERMINIZED_STATES = 100000;

    public static FieldFetcher create(SearchExecutionContext context, Collection fieldAndFormats) {
        Set nestedMappingPaths = context.nestedLookup().getNestedMappers().keySet();
        return create(context, fieldAndFormats, nestedMappingPaths, "");
    }

    private static FieldFetcher create(
        SearchExecutionContext context,
        Collection fieldAndFormats,
        Set nestedMappingsInScope,
        String nestedScopePath
    ) {
        // here we only need the nested paths that are closes to the root, e.g. only "foo" if also "foo.bar" is present.
        // the remaining nested field paths are handled recursively
        Set nestedParentPaths = getParentPaths(nestedMappingsInScope, context);

        // Using a LinkedHashMap so fields are returned in the order requested.
        // We won't formally guarantee this but but its good for readability of the response
        Map fieldContexts = new LinkedHashMap<>();
        List unmappedFetchPattern = new ArrayList<>();

        for (FieldAndFormat fieldAndFormat : fieldAndFormats) {
            String fieldPattern = fieldAndFormat.field;
            boolean isWildcardPattern = Regex.isSimpleMatchPattern(fieldPattern);
            if (fieldAndFormat.includeUnmapped != null && fieldAndFormat.includeUnmapped) {
                unmappedFetchPattern.add(fieldAndFormat.field);
            }

            for (String field : context.getMatchingFieldNames(fieldPattern)) {
                MappedFieldType ft = context.getFieldType(field);
                // we want to skip metadata fields if we have a wildcard pattern
                if (context.isMetadataField(field) && isWildcardPattern) {
                    continue;
                }
                if (field.startsWith(nestedScopePath) == false) {
                    // this field is out of scope for this FieldFetcher (its likely nested) so ignore
                    continue;
                }
                String nestedParentPath = null;
                if (nestedParentPaths.isEmpty() == false) {
                    // try to find the shortest nested parent path for this field
                    for (String nestedFieldPath : nestedParentPaths) {
                        if (field.startsWith(nestedFieldPath)
                            && field.length() > nestedFieldPath.length()
                            && field.charAt(nestedFieldPath.length()) == '.') {
                            nestedParentPath = nestedFieldPath;
                            break;
                        }
                    }
                }
                // only add concrete fields if they are not beneath a known nested field
                if (nestedParentPath == null) {
                    ValueFetcher valueFetcher;
                    try {
                        valueFetcher = ft.valueFetcher(context, fieldAndFormat.format);
                    } catch (IllegalArgumentException e) {
                        StringBuilder error = new StringBuilder("error fetching [").append(field).append(']');
                        if (isWildcardPattern) {
                            error.append(" which matched [").append(fieldAndFormat.field).append(']');
                        }
                        error.append(": ").append(e.getMessage());
                        throw new IllegalArgumentException(error.toString(), e);
                    }
                    fieldContexts.put(field, new FieldContext(field, valueFetcher));
                }
            }
        }

        // create a new nested value fetcher for patterns under nested field
        for (String nestedFieldPath : nestedParentPaths) {
            // We construct a field fetcher that narrows the allowed lookup scope to everything beneath its nested field path.
            // We also need to remove this nested field path and everything beneath it from the list of available nested fields before
            // creating this internal field fetcher to avoid infinite loops on this recursion
            Set narrowedScopeNestedMappings = nestedMappingsInScope.stream()
                .filter(s -> nestedParentPaths.contains(s) == false)
                .collect(Collectors.toSet());

            FieldFetcher nestedSubFieldFetcher = FieldFetcher.create(
                context,
                fieldAndFormats,
                narrowedScopeNestedMappings,
                nestedFieldPath
            );

            // add a special ValueFetcher that filters source and collects its subfields
            fieldContexts.put(
                nestedFieldPath,
                new FieldContext(nestedFieldPath, new NestedValueFetcher(nestedFieldPath, nestedSubFieldFetcher))
            );
        }

        CharacterRunAutomaton unmappedFieldsFetchAutomaton = null;
        // We separate the "include_unmapped" field patters with wildcards from the rest in order to use less
        // space in the lookup automaton
        Map> partitions = unmappedFetchPattern.stream()
            .collect(Collectors.partitioningBy((s -> Regex.isSimpleMatchPattern(s))));
        List unmappedWildcardPattern = partitions.get(true);
        List unmappedConcreteFields = partitions.get(false);
        if (unmappedWildcardPattern.isEmpty() == false) {
            unmappedFieldsFetchAutomaton = new CharacterRunAutomaton(
                Regex.simpleMatchToAutomaton(unmappedWildcardPattern.toArray(new String[unmappedWildcardPattern.size()])),
                AUTOMATON_MAX_DETERMINIZED_STATES
            );
        }
        return new FieldFetcher(fieldContexts, unmappedFieldsFetchAutomaton, unmappedConcreteFields);
    }

    private final Map fieldContexts;
    private final CharacterRunAutomaton unmappedFieldsFetchAutomaton;
    private final List unmappedConcreteFields;

    private FieldFetcher(
        Map fieldContexts,
        @Nullable CharacterRunAutomaton unmappedFieldsFetchAutomaton,
        @Nullable List unmappedConcreteFields
    ) {
        this.fieldContexts = fieldContexts;
        this.unmappedFieldsFetchAutomaton = unmappedFieldsFetchAutomaton;
        this.unmappedConcreteFields = unmappedConcreteFields;
    }

    public Map fetch(SourceLookup sourceLookup) throws IOException {
        Map documentFields = new HashMap<>();
        for (FieldContext context : fieldContexts.values()) {
            String field = context.fieldName;

            ValueFetcher valueFetcher = context.valueFetcher;
            List