All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.mapper.flattened.FlattenedFieldParser Maven / Gradle / Ivy

There is a newer version: 8.14.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.index.mapper.flattened;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * A helper class for {@link FlattenedFieldMapper} parses a JSON object
 * and produces a pair of indexable fields for each leaf value.
 */
class FlattenedFieldParser {
    static final String SEPARATOR = "\0";
    private static final byte SEPARATOR_BYTE = '\0';

    private final String rootFieldName;
    private final String keyedFieldName;

    private final MappedFieldType fieldType;
    private final int depthLimit;
    private final int ignoreAbove;
    private final String nullValue;

    FlattenedFieldParser(
        String rootFieldName,
        String keyedFieldName,
        MappedFieldType fieldType,
        int depthLimit,
        int ignoreAbove,
        String nullValue
    ) {
        this.rootFieldName = rootFieldName;
        this.keyedFieldName = keyedFieldName;
        this.fieldType = fieldType;
        this.depthLimit = depthLimit;
        this.ignoreAbove = ignoreAbove;
        this.nullValue = nullValue;
    }

    public List parse(XContentParser parser) throws IOException {
        XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);

        ContentPath path = new ContentPath();
        List fields = new ArrayList<>();

        parseObject(parser, path, fields);
        return fields;
    }

    private void parseObject(XContentParser parser, ContentPath path, List fields) throws IOException {
        String currentName = null;
        while (true) {
            XContentParser.Token token = parser.nextToken();
            if (token == XContentParser.Token.END_OBJECT) {
                return;
            }

            if (token == XContentParser.Token.FIELD_NAME) {
                currentName = parser.currentName();
            } else {
                assert currentName != null;
                parseFieldValue(token, parser, path, currentName, fields);
            }
        }
    }

    private void parseArray(XContentParser parser, ContentPath path, String currentName, List fields) throws IOException {
        while (true) {
            XContentParser.Token token = parser.nextToken();
            if (token == XContentParser.Token.END_ARRAY) {
                return;
            }
            parseFieldValue(token, parser, path, currentName, fields);
        }
    }

    private void parseFieldValue(
        XContentParser.Token token,
        XContentParser parser,
        ContentPath path,
        String currentName,
        List fields
    ) throws IOException {
        if (token == XContentParser.Token.START_OBJECT) {
            path.add(currentName);
            validateDepthLimit(path);
            parseObject(parser, path, fields);
            path.remove();
        } else if (token == XContentParser.Token.START_ARRAY) {
            parseArray(parser, path, currentName, fields);
        } else if (token.isValue()) {
            String value = parser.text();
            addField(path, currentName, value, fields);
        } else if (token == XContentParser.Token.VALUE_NULL) {
            if (nullValue != null) {
                addField(path, currentName, nullValue, fields);
            }
        } else {
            // Note that we throw an exception here just to be safe. We don't actually expect to reach
            // this case, since XContentParser verifies that the input is well-formed as it parses.
            throw new IllegalArgumentException("Encountered unexpected token [" + token.toString() + "].");
        }
    }

    private void addField(ContentPath path, String currentName, String value, List fields) {
        if (value.length() > ignoreAbove) {
            return;
        }

        String key = path.pathAsText(currentName);
        if (key.contains(SEPARATOR)) {
            throw new IllegalArgumentException(
                "Keys in [flattened] fields cannot contain the reserved character \\0." + " Offending key: [" + key + "]."
            );
        }
        String keyedValue = createKeyedValue(key, value);
        BytesRef bytesKeyedValue = new BytesRef(keyedValue);
        // check the keyed value doesn't exceed the IndexWriter.MAX_TERM_LENGTH limit enforced by Lucene at index time
        // in that case we can already throw a more user friendly exception here which includes the offending fields key and value lengths
        if (bytesKeyedValue.length > IndexWriter.MAX_TERM_LENGTH) {
            String msg = "Flattened field ["
                + rootFieldName
                + "] contains one immense field"
                + " whose keyed encoding is longer than the allowed max length of "
                + IndexWriter.MAX_TERM_LENGTH
                + " bytes. Key length: "
                + key.length()
                + ", value length: "
                + value.length()
                + " for key starting with ["
                + key.substring(0, Math.min(key.length(), 50))
                + "]";
            throw new IllegalArgumentException(msg);
        }
        BytesRef bytesValue = new BytesRef(value);
        if (fieldType.isSearchable()) {
            fields.add(new StringField(rootFieldName, bytesValue, Field.Store.NO));
            fields.add(new StringField(keyedFieldName, bytesKeyedValue, Field.Store.NO));
        }

        if (fieldType.hasDocValues()) {
            fields.add(new SortedSetDocValuesField(rootFieldName, bytesValue));
            fields.add(new SortedSetDocValuesField(keyedFieldName, bytesKeyedValue));
        }
    }

    private void validateDepthLimit(ContentPath path) {
        if (path.length() + 1 > depthLimit) {
            throw new IllegalArgumentException(
                "The provided [flattened] field [" + rootFieldName + "]" + " exceeds the maximum depth limit of [" + depthLimit + "]."
            );
        }
    }

    static String createKeyedValue(String key, String value) {
        return key + SEPARATOR + value;
    }

    static BytesRef extractKey(BytesRef keyedValue) {
        int length;
        for (length = 0; length < keyedValue.length; length++) {
            if (keyedValue.bytes[keyedValue.offset + length] == SEPARATOR_BYTE) {
                break;
            }
        }
        return new BytesRef(keyedValue.bytes, keyedValue.offset, length);
    }

    static BytesRef extractValue(BytesRef keyedValue) {
        int length;
        for (length = 0; length < keyedValue.length; length++) {
            if (keyedValue.bytes[keyedValue.offset + length] == SEPARATOR_BYTE) {
                break;
            }
        }
        int valueStart = keyedValue.offset + length + 1;
        return new BytesRef(keyedValue.bytes, valueStart, keyedValue.length - valueStart);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy