
org.elasticsearch.index.mapper.flattened.FlattenedFieldParser Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.index.mapper.flattened;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.xcontent.XContentParser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* A helper class for {@link FlattenedFieldMapper} parses a JSON object
* and produces a pair of indexable fields for each leaf value.
*/
class FlattenedFieldParser {
static final String SEPARATOR = "\0";
private static final byte SEPARATOR_BYTE = '\0';
private final String rootFieldName;
private final String keyedFieldName;
private final MappedFieldType fieldType;
private final int depthLimit;
private final int ignoreAbove;
private final String nullValue;
FlattenedFieldParser(
String rootFieldName,
String keyedFieldName,
MappedFieldType fieldType,
int depthLimit,
int ignoreAbove,
String nullValue
) {
this.rootFieldName = rootFieldName;
this.keyedFieldName = keyedFieldName;
this.fieldType = fieldType;
this.depthLimit = depthLimit;
this.ignoreAbove = ignoreAbove;
this.nullValue = nullValue;
}
public List parse(XContentParser parser) throws IOException {
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
ContentPath path = new ContentPath();
List fields = new ArrayList<>();
parseObject(parser, path, fields);
return fields;
}
private void parseObject(XContentParser parser, ContentPath path, List fields) throws IOException {
String currentName = null;
while (true) {
XContentParser.Token token = parser.nextToken();
if (token == XContentParser.Token.END_OBJECT) {
return;
}
if (token == XContentParser.Token.FIELD_NAME) {
currentName = parser.currentName();
} else {
assert currentName != null;
parseFieldValue(token, parser, path, currentName, fields);
}
}
}
private void parseArray(XContentParser parser, ContentPath path, String currentName, List fields) throws IOException {
while (true) {
XContentParser.Token token = parser.nextToken();
if (token == XContentParser.Token.END_ARRAY) {
return;
}
parseFieldValue(token, parser, path, currentName, fields);
}
}
private void parseFieldValue(
XContentParser.Token token,
XContentParser parser,
ContentPath path,
String currentName,
List fields
) throws IOException {
if (token == XContentParser.Token.START_OBJECT) {
path.add(currentName);
validateDepthLimit(path);
parseObject(parser, path, fields);
path.remove();
} else if (token == XContentParser.Token.START_ARRAY) {
parseArray(parser, path, currentName, fields);
} else if (token.isValue()) {
String value = parser.text();
addField(path, currentName, value, fields);
} else if (token == XContentParser.Token.VALUE_NULL) {
if (nullValue != null) {
addField(path, currentName, nullValue, fields);
}
} else {
// Note that we throw an exception here just to be safe. We don't actually expect to reach
// this case, since XContentParser verifies that the input is well-formed as it parses.
throw new IllegalArgumentException("Encountered unexpected token [" + token.toString() + "].");
}
}
private void addField(ContentPath path, String currentName, String value, List fields) {
if (value.length() > ignoreAbove) {
return;
}
String key = path.pathAsText(currentName);
if (key.contains(SEPARATOR)) {
throw new IllegalArgumentException(
"Keys in [flattened] fields cannot contain the reserved character \\0." + " Offending key: [" + key + "]."
);
}
String keyedValue = createKeyedValue(key, value);
BytesRef bytesKeyedValue = new BytesRef(keyedValue);
// check the keyed value doesn't exceed the IndexWriter.MAX_TERM_LENGTH limit enforced by Lucene at index time
// in that case we can already throw a more user friendly exception here which includes the offending fields key and value lengths
if (bytesKeyedValue.length > IndexWriter.MAX_TERM_LENGTH) {
String msg = "Flattened field ["
+ rootFieldName
+ "] contains one immense field"
+ " whose keyed encoding is longer than the allowed max length of "
+ IndexWriter.MAX_TERM_LENGTH
+ " bytes. Key length: "
+ key.length()
+ ", value length: "
+ value.length()
+ " for key starting with ["
+ key.substring(0, Math.min(key.length(), 50))
+ "]";
throw new IllegalArgumentException(msg);
}
BytesRef bytesValue = new BytesRef(value);
if (fieldType.isSearchable()) {
fields.add(new StringField(rootFieldName, bytesValue, Field.Store.NO));
fields.add(new StringField(keyedFieldName, bytesKeyedValue, Field.Store.NO));
}
if (fieldType.hasDocValues()) {
fields.add(new SortedSetDocValuesField(rootFieldName, bytesValue));
fields.add(new SortedSetDocValuesField(keyedFieldName, bytesKeyedValue));
}
}
private void validateDepthLimit(ContentPath path) {
if (path.length() + 1 > depthLimit) {
throw new IllegalArgumentException(
"The provided [flattened] field [" + rootFieldName + "]" + " exceeds the maximum depth limit of [" + depthLimit + "]."
);
}
}
static String createKeyedValue(String key, String value) {
return key + SEPARATOR + value;
}
static BytesRef extractKey(BytesRef keyedValue) {
int length;
for (length = 0; length < keyedValue.length; length++) {
if (keyedValue.bytes[keyedValue.offset + length] == SEPARATOR_BYTE) {
break;
}
}
return new BytesRef(keyedValue.bytes, keyedValue.offset, length);
}
static BytesRef extractValue(BytesRef keyedValue) {
int length;
for (length = 0; length < keyedValue.length; length++) {
if (keyedValue.bytes[keyedValue.offset + length] == SEPARATOR_BYTE) {
break;
}
}
int valueStart = keyedValue.offset + length + 1;
return new BytesRef(keyedValue.bytes, valueStart, keyedValue.length - valueStart);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy