org.elasticsearch.index.mapper.TextFieldMapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import static java.util.Collections.unmodifiableList;
import static org.elasticsearch.index.mapper.TypeParsers.parseTextField;
/** A {@link FieldMapper} for full-text fields. */
public class TextFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "text";
private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
private static final List SUPPORTED_PARAMETERS_FOR_AUTO_DOWNGRADE_TO_STRING = unmodifiableList(Arrays.asList(
"type",
// common text parameters, for which the upgrade is straightforward
"index", "store", "doc_values", "omit_norms", "norms", "boost", "fields", "copy_to",
"fielddata", "eager_global_ordinals", "fielddata_frequency_filter", "include_in_all",
"analyzer", "search_analyzer", "search_quote_analyzer",
"index_options", "position_increment_gap", "similarity"));
public static class Defaults {
public static double FIELDDATA_MIN_FREQUENCY = 0;
public static double FIELDDATA_MAX_FREQUENCY = Integer.MAX_VALUE;
public static int FIELDDATA_MIN_SEGMENT_SIZE = 0;
public static final MappedFieldType FIELD_TYPE = new TextFieldType();
static {
FIELD_TYPE.freeze();
}
/**
* The default position_increment_gap is set to 100 so that phrase
* queries of reasonably high slop will not match across field values.
*/
public static final int POSITION_INCREMENT_GAP = 100;
}
public static class Builder extends FieldMapper.Builder {
private int positionIncrementGap = POSITION_INCREMENT_GAP_USE_ANALYZER;
public Builder(String name) {
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
builder = this;
}
@Override
public TextFieldType fieldType() {
return (TextFieldType) super.fieldType();
}
public Builder positionIncrementGap(int positionIncrementGap) {
if (positionIncrementGap < 0) {
throw new MapperParsingException("[positions_increment_gap] must be positive, got " + positionIncrementGap);
}
this.positionIncrementGap = positionIncrementGap;
return this;
}
public Builder fielddata(boolean fielddata) {
fieldType().setFielddata(fielddata);
return builder;
}
@Override
public Builder docValues(boolean docValues) {
if (docValues) {
throw new IllegalArgumentException("[text] fields do not support doc values");
}
return super.docValues(docValues);
}
public Builder eagerGlobalOrdinals(boolean eagerGlobalOrdinals) {
fieldType().setEagerGlobalOrdinals(eagerGlobalOrdinals);
return builder;
}
public Builder fielddataFrequencyFilter(double minFreq, double maxFreq, int minSegmentSize) {
fieldType().setFielddataMinFrequency(minFreq);
fieldType().setFielddataMaxFrequency(maxFreq);
fieldType().setFielddataMinSegmentSize(minSegmentSize);
return builder;
}
@Override
public TextFieldMapper build(BuilderContext context) {
if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot set position_increment_gap on field ["
+ name + "] without positions enabled");
}
fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionIncrementGap));
fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap));
fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap));
}
setupFieldType(context);
return new TextFieldMapper(
name, fieldType, defaultFieldType, positionIncrementGap, includeInAll,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder parse(String fieldName, Map node, ParserContext parserContext) throws MapperParsingException {
if (parserContext.indexVersionCreated().before(Version.V_5_0_0_alpha1)) {
// Downgrade "text" to "string" in indexes created in 2.x so you can use modern syntax against old indexes
Set unsupportedParameters = new HashSet<>(node.keySet());
unsupportedParameters.removeAll(SUPPORTED_PARAMETERS_FOR_AUTO_DOWNGRADE_TO_STRING);
if (false == SUPPORTED_PARAMETERS_FOR_AUTO_DOWNGRADE_TO_STRING.containsAll(node.keySet())) {
throw new IllegalArgumentException("Automatic downgrade from [text] to [string] failed because parameters "
+ unsupportedParameters + " are not supported for automatic downgrades.");
}
{ // Downgrade "index"
Object index = node.get("index");
if (index == null || Boolean.TRUE.equals(index)) {
index = "analyzed";
} else if (Boolean.FALSE.equals(index)) {
index = "no";
} else {
throw new IllegalArgumentException(
"Can't parse [index] value [" + index + "] for field [" + fieldName + "], expected [true] or [false]");
}
node.put("index", index);
}
{ // Downgrade "fielddata" (default in string is true, default in text is false)
Object fielddata = node.get("fielddata");
if (fielddata == null || Boolean.FALSE.equals(fielddata)) {
fielddata = false;
} else if (Boolean.TRUE.equals(fielddata)) {
fielddata = true;
} else {
throw new IllegalArgumentException("can't parse [fielddata] value for [" + fielddata + "] for field ["
+ fieldName + "], expected [true] or [false]");
}
node.put("fielddata", fielddata);
}
return new StringFieldMapper.TypeParser().parse(fieldName, node, parserContext);
}
TextFieldMapper.Builder builder = new TextFieldMapper.Builder(fieldName);
builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer());
builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer());
builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer());
parseTextField(builder, fieldName, node, parserContext);
for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) {
Map.Entry entry = iterator.next();
String propName = entry.getKey();
Object propNode = entry.getValue();
if (propName.equals("position_increment_gap")) {
int newPositionIncrementGap = XContentMapValues.nodeIntegerValue(propNode, -1);
builder.positionIncrementGap(newPositionIncrementGap);
iterator.remove();
} else if (propName.equals("fielddata")) {
builder.fielddata(XContentMapValues.nodeBooleanValue(propNode));
iterator.remove();
} else if (propName.equals("eager_global_ordinals")) {
builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode));
iterator.remove();
} else if (propName.equals("fielddata_frequency_filter")) {
Map frequencyFilter = (Map) propNode;
double minFrequency = XContentMapValues.nodeDoubleValue(frequencyFilter.remove("min"), 0);
double maxFrequency = XContentMapValues.nodeDoubleValue(frequencyFilter.remove("max"), Integer.MAX_VALUE);
int minSegmentSize = XContentMapValues.nodeIntegerValue(frequencyFilter.remove("min_segment_size"), 0);
builder.fielddataFrequencyFilter(minFrequency, maxFrequency, minSegmentSize);
DocumentMapperParser.checkNoRemainingFields(propName, frequencyFilter, parserContext.indexVersionCreated());
iterator.remove();
}
}
return builder;
}
}
public static final class TextFieldType extends StringFieldType {
private boolean fielddata;
private double fielddataMinFrequency;
private double fielddataMaxFrequency;
private int fielddataMinSegmentSize;
public TextFieldType() {
setTokenized(true);
fielddata = false;
fielddataMinFrequency = Defaults.FIELDDATA_MIN_FREQUENCY;
fielddataMaxFrequency = Defaults.FIELDDATA_MAX_FREQUENCY;
fielddataMinSegmentSize = Defaults.FIELDDATA_MIN_SEGMENT_SIZE;
}
protected TextFieldType(TextFieldType ref) {
super(ref);
this.fielddata = ref.fielddata;
this.fielddataMinFrequency = ref.fielddataMinFrequency;
this.fielddataMaxFrequency = ref.fielddataMaxFrequency;
this.fielddataMinSegmentSize = ref.fielddataMinSegmentSize;
}
public TextFieldType clone() {
return new TextFieldType(this);
}
@Override
public boolean equals(Object o) {
if (super.equals(o) == false) {
return false;
}
TextFieldType that = (TextFieldType) o;
return fielddata == that.fielddata
&& fielddataMinFrequency == that.fielddataMinFrequency
&& fielddataMaxFrequency == that.fielddataMaxFrequency
&& fielddataMinSegmentSize == that.fielddataMinSegmentSize;
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), fielddata,
fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize);
}
@Override
public void checkCompatibility(MappedFieldType other,
List conflicts, boolean strict) {
super.checkCompatibility(other, conflicts, strict);
TextFieldType otherType = (TextFieldType) other;
if (strict) {
if (fielddata() != otherType.fielddata()) {
conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update [fielddata] "
+ "across all types.");
}
if (fielddataMinFrequency() != otherType.fielddataMinFrequency()) {
conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update "
+ "[fielddata_frequency_filter.min] across all types.");
}
if (fielddataMaxFrequency() != otherType.fielddataMaxFrequency()) {
conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update "
+ "[fielddata_frequency_filter.max] across all types.");
}
if (fielddataMinSegmentSize() != otherType.fielddataMinSegmentSize()) {
conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update "
+ "[fielddata_frequency_filter.min_segment_size] across all types.");
}
}
}
public boolean fielddata() {
return fielddata;
}
public void setFielddata(boolean fielddata) {
checkIfFrozen();
this.fielddata = fielddata;
}
public double fielddataMinFrequency() {
return fielddataMinFrequency;
}
public void setFielddataMinFrequency(double fielddataMinFrequency) {
checkIfFrozen();
this.fielddataMinFrequency = fielddataMinFrequency;
}
public double fielddataMaxFrequency() {
return fielddataMaxFrequency;
}
public void setFielddataMaxFrequency(double fielddataMaxFrequency) {
checkIfFrozen();
this.fielddataMaxFrequency = fielddataMaxFrequency;
}
public int fielddataMinSegmentSize() {
return fielddataMinSegmentSize;
}
public void setFielddataMinSegmentSize(int fielddataMinSegmentSize) {
checkIfFrozen();
this.fielddataMinSegmentSize = fielddataMinSegmentSize;
}
@Override
public String typeName() {
return CONTENT_TYPE;
}
@Override
public Query nullValueQuery() {
if (nullValue() == null) {
return null;
}
return termQuery(nullValue(), null);
}
@Override
public IndexFieldData.Builder fielddataBuilder() {
if (fielddata == false) {
throw new IllegalArgumentException("Fielddata is disabled on text fields by default. Set fielddata=true on [" + name()
+ "] in order to load fielddata in memory by uninverting the inverted index. Note that this can however "
+ "use significant memory.");
}
return new PagedBytesIndexFieldData.Builder(fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize);
}
}
private Boolean includeInAll;
private int positionIncrementGap;
protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
int positionIncrementGap, Boolean includeInAll,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
assert fieldType.tokenized();
assert fieldType.hasDocValues() == false;
if (fieldType().indexOptions() == IndexOptions.NONE && fieldType().fielddata()) {
throw new IllegalArgumentException("Cannot enable fielddata on a [text] field that is not indexed: [" + name() + "]");
}
this.positionIncrementGap = positionIncrementGap;
this.includeInAll = includeInAll;
}
@Override
protected TextFieldMapper clone() {
return (TextFieldMapper) super.clone();
}
// pkg-private for testing
Boolean includeInAll() {
return includeInAll;
}
public int getPositionIncrementGap() {
return this.positionIncrementGap;
}
@Override
protected void parseCreateField(ParseContext context, List fields) throws IOException {
final String value;
if (context.externalValueSet()) {
value = context.externalValue().toString();
} else {
value = context.parser().textOrNull();
}
if (value == null) {
return;
}
if (context.includeInAll(includeInAll, this)) {
context.allEntries().addText(fieldType().name(), value, fieldType().boost());
}
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
Field field = new Field(fieldType().name(), value, fieldType());
fields.add(field);
}
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
@Override
protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
super.doMerge(mergeWith, updateAllTypes);
this.includeInAll = ((TextFieldMapper) mergeWith).includeInAll;
}
@Override
public TextFieldType fieldType() {
return (TextFieldType) super.fieldType();
}
@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
doXContentAnalyzers(builder, includeDefaults);
if (includeInAll != null) {
builder.field("include_in_all", includeInAll);
} else if (includeDefaults) {
builder.field("include_in_all", true);
}
if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
builder.field("position_increment_gap", positionIncrementGap);
}
if (includeDefaults || fieldType().fielddata() != ((TextFieldType) defaultFieldType).fielddata()) {
builder.field("fielddata", fieldType().fielddata());
}
if (fieldType().fielddata()) {
if (includeDefaults
|| fieldType().fielddataMinFrequency() != Defaults.FIELDDATA_MIN_FREQUENCY
|| fieldType().fielddataMaxFrequency() != Defaults.FIELDDATA_MAX_FREQUENCY
|| fieldType().fielddataMinSegmentSize() != Defaults.FIELDDATA_MIN_SEGMENT_SIZE) {
builder.startObject("fielddata_frequency_filter");
if (includeDefaults || fieldType().fielddataMinFrequency() != Defaults.FIELDDATA_MIN_FREQUENCY) {
builder.field("min", fieldType().fielddataMinFrequency());
}
if (includeDefaults || fieldType().fielddataMaxFrequency() != Defaults.FIELDDATA_MAX_FREQUENCY) {
builder.field("max", fieldType().fielddataMaxFrequency());
}
if (includeDefaults || fieldType().fielddataMinSegmentSize() != Defaults.FIELDDATA_MIN_SEGMENT_SIZE) {
builder.field("min_segment_size", fieldType().fielddataMinSegmentSize());
}
builder.endObject();
}
}
}
}