org.elasticsearch.search.lookup.SourceLookup Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.14.0
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.search.lookup;

import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.MemoizedSupplier;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;

import static java.util.Collections.emptyMap;

public class SourceLookup implements Map {

    private LeafReader reader;
    private CheckedBiConsumer fieldReader;

    private int docId = -1;

    private BytesReference sourceAsBytes;
    private Map source;
    private XContentType sourceContentType;

    public XContentType sourceContentType() {
        return sourceContentType;
    }

    public int docId() {
        return docId;
    }

    /**
     * Return the source as a map that will be unchanged when the lookup
     * moves to a different document.
     * 
     * Important: This can lose precision on numbers with a decimal point. It
     * converts numbers like {@code "n": 1234.567} to a {@code double} which
     * only has 52 bits of precision in the mantissa. This will come up most
     * frequently when folks write nanosecond precision dates as a decimal
     * number.
     */
    public Map source() {
        if (source != null) {
            return source;
        }
        if (sourceAsBytes != null) {
            Tuple> tuple = sourceAsMapAndType(sourceAsBytes);
            sourceContentType = tuple.v1();
            source = tuple.v2();
            return source;
        }
        try {
            FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
            fieldReader.accept(docId, sourceFieldVisitor);
            BytesReference source = sourceFieldVisitor.source();
            if (source == null) {
                this.source = emptyMap();
                this.sourceContentType = null;
            } else {
                Tuple> tuple = sourceAsMapAndType(source);
                this.sourceContentType = tuple.v1();
                this.source = tuple.v2();
            }
        } catch (Exception e) {
            throw new ElasticsearchParseException("failed to parse / load source", e);
        }
        return this.source;
    }

    private static Tuple> sourceAsMapAndType(BytesReference source) throws ElasticsearchParseException {
        return XContentHelper.convertToMap(source, false);
    }

    /**
     * Get the source as a {@link Map} of java objects.
     * 

     * Important: This can lose precision on numbers with a decimal point. It
     * converts numbers like {@code "n": 1234.567} to a {@code double} which
     * only has 52 bits of precision in the mantissa. This will come up most
     * frequently when folks write nanosecond precision dates as a decimal
     * number.
     */
    public static Map sourceAsMap(BytesReference source) throws ElasticsearchParseException {
        return sourceAsMapAndType(source).v2();
    }

    public void setSegmentAndDocument(LeafReaderContext context, int docId) {
        // if we are called with the same document, don't invalidate source
        if (this.reader == context.reader() && this.docId == docId) {
            return;
        }

        // only reset reader and fieldReader when reader changes
        if (this.reader != context.reader()) {
            this.reader = context.reader();

            // All the docs to fetch are adjacent but Lucene stored fields are optimized
            // for random access and don't optimize for sequential access - except for merging.
            // So we do a little hack here and pretend we're going to do merges in order to
            // get better sequential access.
            if (context.reader()instanceof SequentialStoredFieldsLeafReader lf) {
                // Avoid eagerly loading the stored fields reader, since this can be expensive
                Supplier supplier = new MemoizedSupplier<>(lf::getSequentialStoredFieldsReader);
                fieldReader = (d, v) -> supplier.get().visitDocument(d, v);
            } else {
                fieldReader = context.reader()::document;
            }
        }
        this.source = null;
        this.sourceAsBytes = null;
        this.docId = docId;
    }

    public void setSource(BytesReference source) {
        this.sourceAsBytes = source;
    }

    public void setSourceContentType(XContentType sourceContentType) {
        this.sourceContentType = sourceContentType;
    }

    public void setSource(Map source) {
        this.source = source;
    }

    /**
     * Internal source representation, might be compressed....
     */
    public BytesReference internalSourceRef() {
        return sourceAsBytes;
    }

    /**
     * Checks if the source has been deserialized as a {@link Map} of java objects.
     */
    public boolean hasSourceAsMap() {
        return source != null;
    }

    /**
     * Returns the values associated with the path. Those are "low" level values, and it can
     * handle path expression where an array/list is navigated within.
     *
     * This method will:
     *
     *  - not cache source if it's not already parsed
     *  - will only extract the desired values from the compressed source instead of deserializing the whole object
     *
     * This is useful when the caller only wants a single value from source and does not care of source is fully parsed and cached
     * for later use.
     * @param path The path from which to extract the values from source
     * @return The list of found values or an empty list if none are found
     */
    public List