Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.index.mapper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.common.collect.Iterators;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.AutomatonQueries;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.core.xcontent.ToXContent;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.analysis.AnalyzerScope;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.PagedBytesIndexFieldData;
import org.opensearch.index.mapper.Mapper.TypeParser.ParserContext;
import org.opensearch.index.query.IntervalBuilder;
import org.opensearch.index.query.IntervalMode;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.similarity.SimilarityProvider;
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
import org.opensearch.search.lookup.SearchLookup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.IntPredicate;
import java.util.function.Supplier;
/**
* A {@link FieldMapper} for full-text fields.
*
* @opensearch.internal
*/
public class TextFieldMapper extends ParametrizedFieldMapper {
public static final String CONTENT_TYPE = "text";
protected static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
private static final String FAST_PHRASE_SUFFIX = "._index_phrase";
/**
* Default paramters for text fields
*
* @opensearch.internal
*/
public static class Defaults {
public static final double FIELDDATA_MIN_FREQUENCY = 0;
public static final double FIELDDATA_MAX_FREQUENCY = Integer.MAX_VALUE;
public static final int FIELDDATA_MIN_SEGMENT_SIZE = 0;
public static final int INDEX_PREFIX_MIN_CHARS = 2;
public static final int INDEX_PREFIX_MAX_CHARS = 5;
public static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setTokenized(true);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setStoreTermVectors(false);
FIELD_TYPE.setOmitNorms(false);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
FIELD_TYPE.freeze();
}
/**
* The default position_increment_gap is set to 100 so that phrase
* queries of reasonably high slop will not match across field values.
*/
public static final int POSITION_INCREMENT_GAP = 100;
}
private static TextFieldMapper toType(FieldMapper in) {
return (TextFieldMapper) in;
}
/**
* Prefix configuration
*
* @opensearch.internal
*/
protected static final class PrefixConfig implements ToXContent {
final int minChars;
final int maxChars;
PrefixConfig(int minChars, int maxChars) {
this.minChars = minChars;
this.maxChars = maxChars;
if (minChars > maxChars) {
throw new IllegalArgumentException("min_chars [" + minChars + "] must be less than max_chars [" + maxChars + "]");
}
if (minChars < 1) {
throw new IllegalArgumentException("min_chars [" + minChars + "] must be greater than zero");
}
if (maxChars >= 20) {
throw new IllegalArgumentException("max_chars [" + maxChars + "] must be less than 20");
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
PrefixConfig that = (PrefixConfig) o;
return minChars == that.minChars && maxChars == that.maxChars;
}
@Override
public int hashCode() {
return Objects.hash(minChars, maxChars);
}
@Override
public String toString() {
return "{ min_chars=" + minChars + ", max_chars=" + maxChars + " }";
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field("min_chars", minChars);
builder.field("max_chars", maxChars);
builder.endObject();
return builder;
}
}
static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) {
if (propNode == null) {
return null;
}
Map, ?> indexPrefix = (Map, ?>) propNode;
int minChars = XContentMapValues.nodeIntegerValue(indexPrefix.remove("min_chars"), Defaults.INDEX_PREFIX_MIN_CHARS);
int maxChars = XContentMapValues.nodeIntegerValue(indexPrefix.remove("max_chars"), Defaults.INDEX_PREFIX_MAX_CHARS);
DocumentMapperParser.checkNoRemainingFields(propName, indexPrefix, parserContext.indexVersionCreated());
return new PrefixConfig(minChars, maxChars);
}
/**
* Frequency filter for field data
*
* @opensearch.internal
*/
protected static final class FielddataFrequencyFilter implements ToXContent {
final double minFreq;
final double maxFreq;
final int minSegmentSize;
private FielddataFrequencyFilter(double minFreq, double maxFreq, int minSegmentSize) {
this.minFreq = minFreq;
this.maxFreq = maxFreq;
this.minSegmentSize = minSegmentSize;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FielddataFrequencyFilter that = (FielddataFrequencyFilter) o;
return Double.compare(that.minFreq, minFreq) == 0
&& Double.compare(that.maxFreq, maxFreq) == 0
&& minSegmentSize == that.minSegmentSize;
}
@Override
public int hashCode() {
return Objects.hash(minFreq, maxFreq, minSegmentSize);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field("min", minFreq);
builder.field("max", maxFreq);
builder.field("min_segment_size", minSegmentSize);
builder.endObject();
return builder;
}
@Override
public String toString() {
return "{ min=" + minFreq + ", max=" + maxFreq + ", min_segment_size=" + minSegmentSize + " }";
}
}
private static final FielddataFrequencyFilter DEFAULT_FILTER = new FielddataFrequencyFilter(
Defaults.FIELDDATA_MIN_FREQUENCY,
Defaults.FIELDDATA_MAX_FREQUENCY,
Defaults.FIELDDATA_MIN_SEGMENT_SIZE
);
private static FielddataFrequencyFilter parseFrequencyFilter(String name, ParserContext parserContext, Object node) {
Map, ?> frequencyFilter = (Map, ?>) node;
double minFrequency = XContentMapValues.nodeDoubleValue(frequencyFilter.remove("min"), 0);
double maxFrequency = XContentMapValues.nodeDoubleValue(frequencyFilter.remove("max"), Integer.MAX_VALUE);
int minSegmentSize = XContentMapValues.nodeIntegerValue(frequencyFilter.remove("min_segment_size"), 0);
DocumentMapperParser.checkNoRemainingFields(name, frequencyFilter, parserContext.indexVersionCreated());
return new FielddataFrequencyFilter(minFrequency, maxFrequency, minSegmentSize);
}
/**
* Builder for text fields
*
* @opensearch.internal
*/
public static class Builder extends ParametrizedFieldMapper.Builder {
private final Version indexCreatedVersion;
protected final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true);
protected final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);
final Parameter similarity = TextParams.similarity(m -> toType(m).similarity);
final Parameter indexOptions = TextParams.indexOptions(m -> toType(m).indexOptions);
final Parameter norms = TextParams.norms(true, m -> toType(m).fieldType.omitNorms() == false);
final Parameter termVectors = TextParams.termVectors(m -> toType(m).termVectors);
final Parameter positionIncrementGap = Parameter.intParam(
"position_increment_gap",
false,
m -> toType(m).positionIncrementGap,
POSITION_INCREMENT_GAP_USE_ANALYZER
);
final Parameter fieldData = Parameter.boolParam(
"fielddata",
true,
m -> ((TextFieldType) toType(m).mappedFieldType).fielddata,
false
);
final Parameter freqFilter = new Parameter<>(
"fielddata_frequency_filter",
true,
() -> DEFAULT_FILTER,
TextFieldMapper::parseFrequencyFilter,
m -> toType(m).freqFilter
);
final Parameter eagerGlobalOrdinals = Parameter.boolParam(
"eager_global_ordinals",
true,
m -> toType(m).mappedFieldType.eagerGlobalOrdinals(),
false
);
final Parameter indexPhrases = Parameter.boolParam(
"index_phrases",
false,
m -> ((TextFieldType) toType(m).mappedFieldType).indexPhrases,
false
);
final Parameter indexPrefixes = new Parameter<>(
"index_prefixes",
false,
() -> null,
TextFieldMapper::parsePrefixConfig,
m -> Optional.ofNullable(((TextFieldType) toType(m).mappedFieldType).prefixFieldType)
.map(p -> new PrefixConfig(p.minChars, p.maxChars))
.orElse(null)
).acceptsNull();
protected final Parameter boost = Parameter.boostParam();
protected final Parameter