All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.mapper.CompletionFieldMapper Maven / Gradle / Ivy

There is a newer version: 8.15.1
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.index.mapper;

import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.suggest.document.Completion50PostingsFormat;
import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
import org.apache.lucene.search.suggest.document.PrefixCompletionQuery;
import org.apache.lucene.search.suggest.document.RegexCompletionQuery;
import org.apache.lucene.search.suggest.document.SuggestField;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.NumberType;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.search.suggest.completion.CompletionSuggester;
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
import org.elasticsearch.search.suggest.completion.context.ContextMappings;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import static org.elasticsearch.index.mapper.TypeParsers.parseMultiField;

/**
 * Mapper for completion field. The field values are indexed as a weighted FST for
 * fast auto-completion/search-as-you-type functionality.
* * Type properties:
*
    *
  • "analyzer": "simple", (default)
  • *
  • "search_analyzer": "simple", (default)
  • *
  • "preserve_separators" : true, (default)
  • *
  • "preserve_position_increments" : true (default)
  • *
  • "min_input_length": 50 (default)
  • *
  • "contexts" : CONTEXTS
  • *
* see {@link ContextMappings#load(Object, Version)} for CONTEXTS
* see {@link #parse(ParseContext)} for acceptable inputs for indexing
*

* This field type constructs completion queries that are run * against the weighted FST index by the {@link CompletionSuggester}. * This field can also be extended to add search criteria to suggestions * for query-time filtering and boosting (see {@link ContextMappings} */ public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapperParser { public static final String CONTENT_TYPE = "completion"; public static class Defaults { public static final MappedFieldType FIELD_TYPE = new CompletionFieldType(); static { FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.freeze(); } public static final boolean DEFAULT_PRESERVE_SEPARATORS = true; public static final boolean DEFAULT_POSITION_INCREMENTS = true; public static final int DEFAULT_MAX_INPUT_LENGTH = 50; } public static class Fields { // Mapping field names public static final ParseField ANALYZER = new ParseField("analyzer"); public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer"); public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators"); public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments"); public static final ParseField TYPE = new ParseField("type"); public static final ParseField CONTEXTS = new ParseField("contexts"); public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len"); // Content field names public static final String CONTENT_FIELD_NAME_INPUT = "input"; public static final String CONTENT_FIELD_NAME_WEIGHT = "weight"; public static final String CONTENT_FIELD_NAME_CONTEXTS = "contexts"; } public static final Set ALLOWED_CONTENT_FIELD_NAMES = Sets.newHashSet(Fields.CONTENT_FIELD_NAME_INPUT, Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTENT_FIELD_NAME_CONTEXTS); public static class TypeParser implements Mapper.TypeParser { @Override public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { if (parserContext.indexVersionCreated().before(Version.V_5_0_0_alpha1)) { return new CompletionFieldMapper2x.TypeParser().parse(name, node, parserContext); } CompletionFieldMapper.Builder builder = new CompletionFieldMapper.Builder(name); NamedAnalyzer indexAnalyzer = null; NamedAnalyzer searchAnalyzer = null; for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { Map.Entry entry = iterator.next(); String fieldName = entry.getKey(); Object fieldNode = entry.getValue(); if (fieldName.equals("type")) { continue; } if (Fields.ANALYZER.match(fieldName)) { indexAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); iterator.remove(); } else if (Fields.SEARCH_ANALYZER.match(fieldName)) { searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); iterator.remove(); } else if (Fields.PRESERVE_SEPARATORS.match(fieldName)) { builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString())); iterator.remove(); } else if (Fields.PRESERVE_POSITION_INCREMENTS.match(fieldName)) { builder.preservePositionIncrements(Boolean.parseBoolean(fieldNode.toString())); iterator.remove(); } else if (Fields.MAX_INPUT_LENGTH.match(fieldName)) { builder.maxInputLength(Integer.parseInt(fieldNode.toString())); iterator.remove(); } else if (Fields.CONTEXTS.match(fieldName)) { builder.contextMappings(ContextMappings.load(fieldNode, parserContext.indexVersionCreated())); iterator.remove(); } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) { iterator.remove(); } } if (indexAnalyzer == null) { if (searchAnalyzer != null) { throw new MapperParsingException("analyzer on completion field [" + name + "] must be set when search_analyzer is set"); } indexAnalyzer = searchAnalyzer = parserContext.getIndexAnalyzers().get("simple"); } else if (searchAnalyzer == null) { searchAnalyzer = indexAnalyzer; } builder.indexAnalyzer(indexAnalyzer); builder.searchAnalyzer(searchAnalyzer); return builder; } private NamedAnalyzer getNamedAnalyzer(ParserContext parserContext, String name) { NamedAnalyzer analyzer = parserContext.getIndexAnalyzers().get(name); if (analyzer == null) { throw new IllegalArgumentException("Can't find default or mapped analyzer with name [" + name + "]"); } return analyzer; } } public static final class CompletionFieldType extends TermBasedFieldType { private static PostingsFormat postingsFormat; private boolean preserveSep = Defaults.DEFAULT_PRESERVE_SEPARATORS; private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; private ContextMappings contextMappings = null; public CompletionFieldType() { } private CompletionFieldType(CompletionFieldType ref) { super(ref); this.contextMappings = ref.contextMappings; this.preserveSep = ref.preserveSep; this.preservePositionIncrements = ref.preservePositionIncrements; } public void setPreserveSep(boolean preserveSep) { checkIfFrozen(); this.preserveSep = preserveSep; } public void setPreservePositionIncrements(boolean preservePositionIncrements) { checkIfFrozen(); this.preservePositionIncrements = preservePositionIncrements; } public void setContextMappings(ContextMappings contextMappings) { checkIfFrozen(); this.contextMappings = contextMappings; } @Override public NamedAnalyzer indexAnalyzer() { final NamedAnalyzer indexAnalyzer = super.indexAnalyzer(); if (indexAnalyzer != null && !(indexAnalyzer.analyzer() instanceof CompletionAnalyzer)) { return new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, new CompletionAnalyzer(indexAnalyzer, preserveSep, preservePositionIncrements)); } return indexAnalyzer; } @Override public NamedAnalyzer searchAnalyzer() { final NamedAnalyzer searchAnalyzer = super.searchAnalyzer(); if (searchAnalyzer != null && !(searchAnalyzer.analyzer() instanceof CompletionAnalyzer)) { return new NamedAnalyzer(searchAnalyzer.name(), AnalyzerScope.INDEX, new CompletionAnalyzer(searchAnalyzer, preserveSep, preservePositionIncrements)); } return searchAnalyzer; } /** * @return true if there are one or more context mappings defined * for this field type */ public boolean hasContextMappings() { return contextMappings != null; } /** * @return associated context mappings for this field type */ public ContextMappings getContextMappings() { return contextMappings; } public boolean preserveSep() { return preserveSep; } public boolean preservePositionIncrements() { return preservePositionIncrements; } /** * @return postings format to use for this field-type */ public static synchronized PostingsFormat postingsFormat() { if (postingsFormat == null) { postingsFormat = new Completion50PostingsFormat(); } return postingsFormat; } /** * Completion prefix query */ public CompletionQuery prefixQuery(Object value) { return new PrefixCompletionQuery(searchAnalyzer().analyzer(), new Term(name(), indexedValueForSearch(value))); } /** * Completion prefix regular expression query */ public CompletionQuery regexpQuery(Object value, int flags, int maxDeterminizedStates) { return new RegexCompletionQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates); } /** * Completion prefix fuzzy query */ public CompletionQuery fuzzyQuery(String value, Fuzziness fuzziness, int nonFuzzyPrefixLength, int minFuzzyPrefixLength, int maxExpansions, boolean transpositions, boolean unicodeAware) { return new FuzzyCompletionQuery(searchAnalyzer().analyzer(), new Term(name(), indexedValueForSearch(value)), null, fuzziness.asDistance(), transpositions, nonFuzzyPrefixLength, minFuzzyPrefixLength, unicodeAware, maxExpansions); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; CompletionFieldType that = (CompletionFieldType) o; if (preserveSep != that.preserveSep) return false; if (preservePositionIncrements != that.preservePositionIncrements) return false; return !(contextMappings != null ? !contextMappings.equals(that.contextMappings) : that.contextMappings != null); } @Override public int hashCode() { return Objects.hash(super.hashCode(), preserveSep, preservePositionIncrements, contextMappings); } @Override public CompletionFieldType clone() { return new CompletionFieldType(this); } @Override public String typeName() { return CONTENT_TYPE; } @Override public void checkCompatibility(MappedFieldType fieldType, List conflicts, boolean strict) { super.checkCompatibility(fieldType, conflicts, strict); CompletionFieldType other = (CompletionFieldType)fieldType; if (preservePositionIncrements != other.preservePositionIncrements) { conflicts.add("mapper [" + name() + "] has different [preserve_position_increments] values"); } if (preserveSep != other.preserveSep) { conflicts.add("mapper [" + name() + "] has different [preserve_separators] values"); } if (hasContextMappings() != other.hasContextMappings()) { conflicts.add("mapper [" + name() + "] has different [context_mappings] values"); } else if (hasContextMappings() && contextMappings.equals(other.contextMappings) == false) { conflicts.add("mapper [" + name() + "] has different [context_mappings] values"); } } } /** * Builder for {@link CompletionFieldMapper} */ public static class Builder extends FieldMapper.Builder { private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH; private ContextMappings contextMappings = null; private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS; private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; /** * @param name of the completion field to build */ public Builder(String name) { super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); builder = this; } /** * @param maxInputLength maximum expected prefix length * NOTE: prefixes longer than this will * be truncated */ public Builder maxInputLength(int maxInputLength) { if (maxInputLength <= 0) { throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]"); } this.maxInputLength = maxInputLength; return this; } /** * Add context mapping to this field * @param contextMappings see {@link ContextMappings#load(Object, Version)} */ public Builder contextMappings(ContextMappings contextMappings) { this.contextMappings = contextMappings; return this; } public Builder preserveSeparators(boolean preserveSeparators) { this.preserveSeparators = preserveSeparators; return this; } public Builder preservePositionIncrements(boolean preservePositionIncrements) { this.preservePositionIncrements = preservePositionIncrements; return this; } @Override public CompletionFieldMapper build(BuilderContext context) { setupFieldType(context); CompletionFieldType completionFieldType = (CompletionFieldType) this.fieldType; completionFieldType.setContextMappings(contextMappings); completionFieldType.setPreservePositionIncrements(preservePositionIncrements); completionFieldType.setPreserveSep(preserveSeparators); return new CompletionFieldMapper(name, this.fieldType, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, maxInputLength); } } private int maxInputLength; public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, int maxInputLength) { super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, multiFields, copyTo); this.maxInputLength = maxInputLength; } @Override public CompletionFieldType fieldType() { return (CompletionFieldType) super.fieldType(); } /** * Parses and indexes inputs * * Parsing: * Acceptable format: * "STRING" - interpreted as field value (input) * "ARRAY" - each element can be one of "OBJECT" (see below) * "OBJECT" - { "input": STRING|ARRAY, "weight": STRING|INT, "contexts": ARRAY|OBJECT } * * Indexing: * if context mappings are defined, delegates to {@link ContextMappings#addField(ParseContext.Document, String, String, int, Map)} * else adds inputs as a {@link org.apache.lucene.search.suggest.document.SuggestField} */ @Override public Mapper parse(ParseContext context) throws IOException { // parse XContentParser parser = context.parser(); Token token = parser.currentToken(); Map inputMap = new HashMap<>(1); if (token == Token.VALUE_NULL) { throw new MapperParsingException("completion field [" + fieldType().name() + "] does not support null values"); } else if (token == Token.START_ARRAY) { while ((token = parser.nextToken()) != Token.END_ARRAY) { parse(context, token, parser, inputMap); } } else { parse(context, token, parser, inputMap); } // index for (Map.Entry completionInput : inputMap.entrySet()) { String input = completionInput.getKey(); // truncate input if (input.length() > maxInputLength) { int len = Math.min(maxInputLength, input.length()); if (Character.isHighSurrogate(input.charAt(len - 1))) { assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len)); len += 1; } input = input.substring(0, len); } CompletionInputMetaData metaData = completionInput.getValue(); if (fieldType().hasContextMappings()) { fieldType().getContextMappings().addField(context.doc(), fieldType().name(), input, metaData.weight, metaData.contexts); } else { context.doc().add(new SuggestField(fieldType().name(), input, metaData.weight)); } } multiFields.parse(this, context); return null; } /** * Acceptable inputs: * "STRING" - interpreted as the field value (input) * "OBJECT" - { "input": STRING|ARRAY, "weight": STRING|INT, "contexts": ARRAY|OBJECT } */ private void parse(ParseContext parseContext, Token token, XContentParser parser, Map inputMap) throws IOException { String currentFieldName = null; if (token == Token.VALUE_STRING) { inputMap.put(parser.text(), new CompletionInputMetaData(Collections.>emptyMap(), 1)); } else if (token == Token.START_OBJECT) { Set inputs = new HashSet<>(); int weight = 1; Map> contextsMap = new HashMap<>(); while ((token = parser.nextToken()) != Token.END_OBJECT) { if (token == Token.FIELD_NAME) { currentFieldName = parser.currentName(); if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) { throw new IllegalArgumentException("unknown field name [" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES); } } else if (currentFieldName != null) { if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) { if (token == Token.VALUE_STRING) { inputs.add(parser.text()); } else if (token == Token.START_ARRAY) { while ((token = parser.nextToken()) != Token.END_ARRAY) { if (token == Token.VALUE_STRING) { inputs.add(parser.text()); } else { throw new IllegalArgumentException("input array must have string values, but was [" + token.name() + "]"); } } } else { throw new IllegalArgumentException("input must be a string or array, but was [" + token.name() + "]"); } } else if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) { final Number weightValue; if (token == Token.VALUE_STRING) { try { weightValue = Long.parseLong(parser.text()); } catch (NumberFormatException e) { throw new IllegalArgumentException("weight must be an integer, but was [" + parser.text() + "]"); } } else if (token == Token.VALUE_NUMBER) { NumberType numberType = parser.numberType(); if (NumberType.LONG != numberType && NumberType.INT != numberType) { throw new IllegalArgumentException("weight must be an integer, but was [" + parser.numberValue() + "]"); } weightValue = parser.numberValue(); } else { throw new IllegalArgumentException("weight must be a number or string, but was [" + token.name() + "]"); } if (weightValue.longValue() < 0 || weightValue.longValue() > Integer.MAX_VALUE) { // always parse a long to make sure we don't get overflow throw new IllegalArgumentException("weight must be in the interval [0..2147483647], but was [" + weightValue.longValue() + "]"); } weight = weightValue.intValue(); } else if (Fields.CONTENT_FIELD_NAME_CONTEXTS.equals(currentFieldName)) { if (fieldType().hasContextMappings() == false) { throw new IllegalArgumentException("contexts field is not supported for field: [" + fieldType().name() + "]"); } ContextMappings contextMappings = fieldType().getContextMappings(); XContentParser.Token currentToken = parser.currentToken(); if (currentToken == XContentParser.Token.START_OBJECT) { ContextMapping contextMapping = null; String fieldName = null; while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (currentToken == XContentParser.Token.FIELD_NAME) { fieldName = parser.currentName(); contextMapping = contextMappings.get(fieldName); } else if (currentToken == XContentParser.Token.VALUE_STRING || currentToken == XContentParser.Token.START_ARRAY || currentToken == XContentParser.Token.START_OBJECT) { assert fieldName != null; assert !contextsMap.containsKey(fieldName); contextsMap.put(fieldName, contextMapping.parseContext(parseContext, parser)); } else { throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]"); } } } else { throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]"); } } } } for (String input : inputs) { if (inputMap.containsKey(input) == false || inputMap.get(input).weight < weight) { inputMap.put(input, new CompletionInputMetaData(contextsMap, weight)); } } } else { throw new ElasticsearchParseException("failed to parse expected text or object got" + token.name()); } } static class CompletionInputMetaData { public final Map> contexts; public final int weight; CompletionInputMetaData(Map> contexts, int weight) { this.contexts = contexts; this.weight = weight; } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(simpleName()) .field(Fields.TYPE.getPreferredName(), CONTENT_TYPE); builder.field(Fields.ANALYZER.getPreferredName(), fieldType().indexAnalyzer().name()); if (fieldType().indexAnalyzer().name().equals(fieldType().searchAnalyzer().name()) == false) { builder.field(Fields.SEARCH_ANALYZER.getPreferredName(), fieldType().searchAnalyzer().name()); } builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().preserveSep()); builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().preservePositionIncrements()); builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength); if (fieldType().hasContextMappings()) { builder.startArray(Fields.CONTEXTS.getPreferredName()); fieldType().getContextMappings().toXContent(builder, params); builder.endArray(); } multiFields.toXContent(builder, params); return builder.endObject(); } @Override protected void parseCreateField(ParseContext context, List fields) throws IOException { // no-op } @Override protected String contentType() { return CONTENT_TYPE; } @Override protected void doMerge(Mapper mergeWith, boolean updateAllTypes) { super.doMerge(mergeWith, updateAllTypes); CompletionFieldMapper fieldMergeWith = (CompletionFieldMapper) mergeWith; this.maxInputLength = fieldMergeWith.maxInputLength; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy