Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.core;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.NumberType;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
import org.elasticsearch.search.suggest.completion.CompletionPostingsFormatProvider;
import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.index.mapper.MapperBuilders.completionField;
/**
*
*/
public class CompletionFieldMapper extends AbstractFieldMapper {
public static final String CONTENT_TYPE = "completion";
public static class Defaults extends AbstractFieldMapper.Defaults {
public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
static {
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
public static final boolean DEFAULT_PRESERVE_SEPARATORS = true;
public static final boolean DEFAULT_POSITION_INCREMENTS = true;
public static final boolean DEFAULT_HAS_PAYLOADS = false;
public static final int DEFAULT_MAX_INPUT_LENGTH = 50;
}
public static class Fields {
// Mapping field names
public static final String ANALYZER = "analyzer";
public static final ParseField INDEX_ANALYZER = new ParseField("index_analyzer");
public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer");
public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators");
public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments");
public static final String PAYLOADS = "payloads";
public static final String TYPE = "type";
public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len");
// Content field names
public static final String CONTENT_FIELD_NAME_INPUT = "input";
public static final String CONTENT_FIELD_NAME_OUTPUT = "output";
public static final String CONTENT_FIELD_NAME_PAYLOAD = "payload";
public static final String CONTENT_FIELD_NAME_WEIGHT = "weight";
}
public static Set ALLOWED_CONTENT_FIELD_NAMES = Sets.newHashSet(Fields.CONTENT_FIELD_NAME_INPUT,
Fields.CONTENT_FIELD_NAME_OUTPUT, Fields.CONTENT_FIELD_NAME_PAYLOAD, Fields.CONTENT_FIELD_NAME_WEIGHT);
public static class Builder extends AbstractFieldMapper.Builder {
private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS;
private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH;
public Builder(String name) {
super(name, new FieldType(Defaults.FIELD_TYPE));
builder = this;
}
public Builder payloads(boolean payloads) {
this.payloads = payloads;
return this;
}
public Builder preserveSeparators(boolean preserveSeparators) {
this.preserveSeparators = preserveSeparators;
return this;
}
public Builder preservePositionIncrements(boolean preservePositionIncrements) {
this.preservePositionIncrements = preservePositionIncrements;
return this;
}
public Builder maxInputLength(int maxInputLength) {
if (maxInputLength <= 0) {
throw new ElasticsearchIllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]");
}
this.maxInputLength = maxInputLength;
return this;
}
@Override
public CompletionFieldMapper build(Mapper.BuilderContext context) {
return new CompletionFieldMapper(buildNames(context), indexAnalyzer, searchAnalyzer, postingsProvider, similarity, payloads,
preserveSeparators, preservePositionIncrements, maxInputLength, multiFieldsBuilder.build(this, context));
}
}
public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException {
CompletionFieldMapper.Builder builder = completionField(name);
for (Map.Entry entry : node.entrySet()) {
String fieldName = entry.getKey();
Object fieldNode = entry.getValue();
if (fieldName.equals("type")) {
continue;
}
if (fieldName.equals("analyzer")) {
NamedAnalyzer analyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
builder.indexAnalyzer(analyzer);
builder.searchAnalyzer(analyzer);
} else if (Fields.INDEX_ANALYZER.match(fieldName)) {
builder.indexAnalyzer(getNamedAnalyzer(parserContext, fieldNode.toString()));
} else if (Fields.SEARCH_ANALYZER.match(fieldName)) {
builder.searchAnalyzer(getNamedAnalyzer(parserContext, fieldNode.toString()));
} else if (fieldName.equals(Fields.PAYLOADS)) {
builder.payloads(Boolean.parseBoolean(fieldNode.toString()));
} else if (Fields.PRESERVE_SEPARATORS.match(fieldName)) {
builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString()));
} else if (Fields.PRESERVE_POSITION_INCREMENTS.match(fieldName)) {
builder.preservePositionIncrements(Boolean.parseBoolean(fieldNode.toString()));
} else if (Fields.MAX_INPUT_LENGTH.match(fieldName)) {
builder.maxInputLength(Integer.parseInt(fieldNode.toString()));
} else {
throw new MapperParsingException("Unknown field [" + fieldName + "]");
}
}
if (builder.searchAnalyzer == null) {
builder.searchAnalyzer(parserContext.analysisService().analyzer("simple"));
}
if (builder.indexAnalyzer == null) {
builder.indexAnalyzer(parserContext.analysisService().analyzer("simple"));
}
// we are just using this as the default to be wrapped by the CompletionPostingsFormatProvider in the SuggesteFieldMapper ctor
builder.postingsFormat(parserContext.postingFormatService().get("default"));
return builder;
}
private NamedAnalyzer getNamedAnalyzer(ParserContext parserContext, String name) {
NamedAnalyzer analyzer = parserContext.analysisService().analyzer(name);
if (analyzer == null) {
throw new ElasticsearchIllegalArgumentException("Can't find default or mapped analyzer with name [" + name + "]");
}
return analyzer;
}
}
private static final BytesRef EMPTY = new BytesRef();
private final CompletionPostingsFormatProvider completionPostingsFormatProvider;
private final AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
private final boolean payloads;
private final boolean preservePositionIncrements;
private final boolean preserveSeparators;
private int maxInputLength;
public CompletionFieldMapper(Names names, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, PostingsFormatProvider postingsProvider, SimilarityProvider similarity, boolean payloads,
boolean preserveSeparators, boolean preservePositionIncrements, int maxInputLength, MultiFields multiFields) {
super(names, 1.0f, Defaults.FIELD_TYPE, null, indexAnalyzer, searchAnalyzer, postingsProvider, null, similarity, null, null, null, multiFields);
analyzingSuggestLookupProvider = new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads);
this.completionPostingsFormatProvider = new CompletionPostingsFormatProvider("completion", postingsProvider, analyzingSuggestLookupProvider);
this.preserveSeparators = preserveSeparators;
this.payloads = payloads;
this.preservePositionIncrements = preservePositionIncrements;
this.maxInputLength = maxInputLength;
}
@Override
public PostingsFormatProvider postingsFormatProvider() {
return this.completionPostingsFormatProvider;
}
@Override
public void parse(ParseContext context) throws IOException {
XContentParser parser = context.parser();
XContentParser.Token token = parser.currentToken();
String surfaceForm = null;
BytesRef payload = null;
long weight = -1;
List inputs = Lists.newArrayListWithExpectedSize(4);
if (token == XContentParser.Token.VALUE_STRING) {
inputs.add(parser.text());
} else {
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) {
throw new ElasticsearchIllegalArgumentException("Unknown field name[" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES);
}
} else if (Fields.CONTENT_FIELD_NAME_PAYLOAD.equals(currentFieldName)) {
if (!isStoringPayloads()) {
throw new MapperException("Payloads disabled in mapping");
}
if (token == XContentParser.Token.START_OBJECT) {
XContentBuilder payloadBuilder = XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser);
payload = payloadBuilder.bytes().toBytesRef();
payloadBuilder.close();
} else if (token.isValue()) {
payload = parser.bytesOrNull();
} else {
throw new MapperException("payload doesn't support type " + token);
}
} else if (token == XContentParser.Token.VALUE_STRING) {
if (Fields.CONTENT_FIELD_NAME_OUTPUT.equals(currentFieldName)) {
surfaceForm = parser.text();
}
if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
inputs.add(parser.text());
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
NumberType numberType = parser.numberType();
if (NumberType.LONG != numberType && NumberType.INT != numberType) {
throw new ElasticsearchIllegalArgumentException("Weight must be an integer, but was [" + parser.numberValue() + "]");
}
weight = parser.longValue(); // always parse a long to make sure we don't get the overflow value
if (weight < 0 || weight > Integer.MAX_VALUE) {
throw new ElasticsearchIllegalArgumentException("Weight must be in the interval [0..2147483647], but was [" + weight + "]");
}
}
} else if (token == XContentParser.Token.START_ARRAY) {
if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
inputs.add(parser.text());
}
}
}
}
}
payload = payload == null ? EMPTY : payload;
if (surfaceForm == null) { // no surface form use the input
for (String input : inputs) {
BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef(
input), weight, payload);
context.doc().add(getCompletionField(input, suggestPayload));
}
} else {
BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef(
surfaceForm), weight, payload);
for (String input : inputs) {
context.doc().add(getCompletionField(input, suggestPayload));
}
}
}
public Field getCompletionField(String input, BytesRef payload) {
final String originalInput = input;
if (input.length() > maxInputLength) {
final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
input = input.substring(0, len);
}
for (int i = 0; i < input.length(); i++) {
if (isReservedChar(input.charAt(i))) {
throw new ElasticsearchIllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
+ Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
+ "] at position " + i + " is a reserved character");
}
}
return new SuggestField(names.indexName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
}
public static int correctSubStringLen(String input, int len) {
if (Character.isHighSurrogate(input.charAt(len - 1))) {
assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
return len + 1;
}
return len;
}
public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
return analyzingSuggestLookupProvider.buildPayload(
surfaceForm, weight, payload);
}
private static final class SuggestField extends Field {
private final BytesRef payload;
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
public SuggestField(String name, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
this.toFiniteStrings = toFiniteStrings;
}
public SuggestField(String name, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
this.toFiniteStrings = toFiniteStrings;
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
TokenStream ts = super.tokenStream(analyzer);
return new CompletionTokenStream(ts, payload, toFiniteStrings);
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name())
.field(Fields.TYPE, CONTENT_TYPE);
if (indexAnalyzer.name().equals(searchAnalyzer.name())) {
builder.field(Fields.ANALYZER, indexAnalyzer.name());
} else {
builder.field(Fields.INDEX_ANALYZER.getPreferredName(), indexAnalyzer.name())
.field(Fields.SEARCH_ANALYZER.getPreferredName(), searchAnalyzer.name());
}
builder.field(Fields.PAYLOADS, this.payloads);
builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), this.preserveSeparators);
builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), this.preservePositionIncrements);
builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength);
return builder.endObject();
}
@Override
protected void parseCreateField(ParseContext context, List fields) throws IOException {
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
@Override
public boolean isSortable() {
return false;
}
@Override
public boolean hasDocValues() {
return false;
}
@Override
public FieldType defaultFieldType() {
return Defaults.FIELD_TYPE;
}
@Override
public FieldDataType defaultFieldDataType() {
return null;
}
@Override
public String value(Object value) {
if (value == null) {
return null;
}
return value.toString();
}
public boolean isStoringPayloads() {
return payloads;
}
@Override
public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
super.merge(mergeWith, mergeContext);
CompletionFieldMapper fieldMergeWith = (CompletionFieldMapper) mergeWith;
if (payloads != fieldMergeWith.payloads) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different payload values");
}
if (preservePositionIncrements != fieldMergeWith.preservePositionIncrements) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_position_increments' values");
}
if (preserveSeparators != fieldMergeWith.preserveSeparators) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_separators' values");
}
if (!mergeContext.mergeFlags().simulate()) {
this.maxInputLength = fieldMergeWith.maxInputLength;
}
}
// this should be package private but our tests don't allow it.
public static boolean isReservedChar(char character) {
/* we use 0x001F as a SEP_LABEL in the suggester but we can use the UTF-16 representation since they
* are equivalent. We also don't need to convert the input character to UTF-8 here to check for
* the 0x00 end label since all multi-byte UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00
* it's the single byte UTF-8 CP */
assert XAnalyzingSuggester.PAYLOAD_SEP == XAnalyzingSuggester.SEP_LABEL; // ensure they are the same!
switch(character) {
case XAnalyzingSuggester.END_BYTE:
case XAnalyzingSuggester.SEP_LABEL:
case XAnalyzingSuggester.HOLE_CHARACTER:
return true;
default:
return false;
}
}
}