org.opensearch.index.analysis.AnalysisRegistry Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.opensearch.LegacyESVersion;
import org.opensearch.OpenSearchException;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.internal.io.IOUtils;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.TextFieldMapper;
import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.opensearch.indices.analysis.PreBuiltAnalyzers;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import static java.util.Collections.unmodifiableMap;
/**
* An internal registry for tokenizer, token filter, char filter and analyzer.
* This class exists per node and allows to create per-index {@link IndexAnalyzers} via {@link #build(IndexSettings)}
*/
public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
public static final String DEFAULT_ANALYZER_NAME = "default";
public static final String DEFAULT_SEARCH_ANALYZER_NAME = "default_search";
public static final String DEFAULT_SEARCH_QUOTED_ANALYZER_NAME = "default_search_quoted";
private final PrebuiltAnalysis prebuiltAnalysis;
private final Map cachedAnalyzer = new ConcurrentHashMap<>();
private final Environment environment;
private final Map> charFilters;
private final Map> tokenFilters;
private final Map> tokenizers;
private final Map>> analyzers;
private final Map>> normalizers;
public AnalysisRegistry(
Environment environment,
Map> charFilters,
Map> tokenFilters,
Map> tokenizers,
Map>> analyzers,
Map>> normalizers,
Map preConfiguredCharFilters,
Map preConfiguredTokenFilters,
Map preConfiguredTokenizers,
Map preConfiguredAnalyzers
) {
this.environment = environment;
this.charFilters = unmodifiableMap(charFilters);
this.tokenFilters = unmodifiableMap(tokenFilters);
this.tokenizers = unmodifiableMap(tokenizers);
this.analyzers = unmodifiableMap(analyzers);
this.normalizers = unmodifiableMap(normalizers);
prebuiltAnalysis = new PrebuiltAnalysis(
preConfiguredCharFilters,
preConfiguredTokenFilters,
preConfiguredTokenizers,
preConfiguredAnalyzers
);
}
private static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
if (settings.isEmpty()) {
settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build();
}
return settings;
}
private static final IndexSettings NO_INDEX_SETTINGS = new IndexSettings(
IndexMetadata.builder(IndexMetadata.INDEX_UUID_NA_VALUE)
.settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
.numberOfReplicas(0)
.numberOfShards(1)
.build(),
Settings.EMPTY
);
private T getComponentFactory(
IndexSettings settings,
NameOrDefinition nod,
String componentType,
Function> globalComponentProvider,
Function> prebuiltComponentProvider,
BiFunction> indexComponentProvider
) throws IOException {
if (nod.definition != null) {
// custom component, so we build it from scratch
String type = nod.definition.get("type");
if (type == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous " + componentType + ": " + nod.definition);
}
AnalysisProvider factory = globalComponentProvider.apply(type);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + type + "]");
}
if (settings == null) {
settings = NO_INDEX_SETTINGS;
}
return factory.get(settings, environment, "__anonymous__" + type, nod.definition);
}
if (settings == null) {
// no index provided, so we use prebuilt analysis components
AnalysisProvider factory = prebuiltComponentProvider.apply(nod.name);
if (factory == null) {
// if there's no prebuilt component, try loading a global one to build with no settings
factory = globalComponentProvider.apply(nod.name);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + nod.name + "]");
}
}
return factory.get(environment, nod.name);
} else {
// get the component from index settings
AnalysisProvider factory = indexComponentProvider.apply(nod.name, settings);
if (factory == null) {
throw new IllegalArgumentException("failed to find " + componentType + " under [" + nod.name + "]");
}
Settings s = getSettingsFromIndexSettings(settings, "index.analysis." + componentType + "." + nod.name);
return factory.get(settings, environment, nod.name, s);
}
}
/**
* Returns a registered {@link TokenizerFactory} provider by name or null
if the tokenizer was not registered
*/
private AnalysisModule.AnalysisProvider getTokenizerProvider(String tokenizer) {
return tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer));
}
/**
* Returns a registered {@link TokenFilterFactory} provider by name or null
if the token filter was not registered
*/
private AnalysisModule.AnalysisProvider getTokenFilterProvider(String tokenFilter) {
return tokenFilters.getOrDefault(tokenFilter, this.prebuiltAnalysis.getTokenFilterFactory(tokenFilter));
}
/**
* Returns a registered {@link CharFilterFactory} provider by name or null
if the char filter was not registered
*/
private AnalysisModule.AnalysisProvider getCharFilterProvider(String charFilter) {
return charFilters.getOrDefault(charFilter, this.prebuiltAnalysis.getCharFilterFactory(charFilter));
}
/**
* Returns a registered {@link Analyzer} provider by name or null
if the analyzer was not registered
*/
public Analyzer getAnalyzer(String analyzer) throws IOException {
AnalysisModule.AnalysisProvider> analyzerProvider = this.prebuiltAnalysis.getAnalyzerProvider(analyzer);
if (analyzerProvider == null) {
AnalysisModule.AnalysisProvider> provider = analyzers.get(analyzer);
return provider == null ? null : cachedAnalyzer.computeIfAbsent(analyzer, (key) -> {
try {
return provider.get(environment, key).get();
} catch (IOException ex) {
throw new OpenSearchException("failed to load analyzer for name " + key, ex);
}
});
} else if ("standard_html_strip".equals(analyzer)) {
if (Version.CURRENT.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[standard_html_strip] analyzer is not supported for new indices, "
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
);
}
}
return analyzerProvider.get(environment, analyzer).get();
}
@Override
public void close() throws IOException {
try {
prebuiltAnalysis.close();
} finally {
IOUtils.close(cachedAnalyzer.values());
}
}
/**
* Creates an index-level {@link IndexAnalyzers} from this registry using the given index settings
*/
public IndexAnalyzers build(IndexSettings indexSettings) throws IOException {
final Map charFilterFactories = buildCharFilterFactories(indexSettings);
final Map tokenizerFactories = buildTokenizerFactories(indexSettings);
final Map tokenFilterFactories = buildTokenFilterFactories(indexSettings);
final Map> analyzerFactories = buildAnalyzerFactories(indexSettings);
final Map> normalizerFactories = buildNormalizerFactories(indexSettings);
return build(indexSettings, analyzerFactories, normalizerFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
}
/**
* Creates a custom analyzer from a collection of {@link NameOrDefinition} specifications for each component
*
* Callers are responsible for closing the returned Analyzer
*/
public NamedAnalyzer buildCustomAnalyzer(
IndexSettings indexSettings,
boolean normalizer,
NameOrDefinition tokenizer,
List charFilters,
List tokenFilters
) throws IOException {
TokenizerFactory tokenizerFactory = getComponentFactory(
indexSettings,
tokenizer,
"tokenizer",
this::getTokenizerProvider,
prebuiltAnalysis::getTokenizerFactory,
this::getTokenizerProvider
);
List charFilterFactories = new ArrayList<>();
for (NameOrDefinition nod : charFilters) {
charFilterFactories.add(
getComponentFactory(
indexSettings,
nod,
"char_filter",
this::getCharFilterProvider,
prebuiltAnalysis::getCharFilterFactory,
this::getCharFilterProvider
)
);
}
List tokenFilterFactories = new ArrayList<>();
for (NameOrDefinition nod : tokenFilters) {
TokenFilterFactory tff = getComponentFactory(
indexSettings,
nod,
"filter",
this::getTokenFilterProvider,
prebuiltAnalysis::getTokenFilterFactory,
this::getTokenFilterProvider
);
if (normalizer && tff instanceof NormalizingTokenFilterFactory == false) {
throw new IllegalArgumentException("Custom normalizer may not use filter [" + tff.name() + "]");
}
tff = tff.getChainAwareTokenFilterFactory(tokenizerFactory, charFilterFactories, tokenFilterFactories, name -> {
try {
return getComponentFactory(
indexSettings,
new NameOrDefinition(name),
"filter",
this::getTokenFilterProvider,
prebuiltAnalysis::getTokenFilterFactory,
this::getTokenFilterProvider
);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
tokenFilterFactories.add(tff);
}
Analyzer analyzer = new CustomAnalyzer(
tokenizerFactory,
charFilterFactories.toArray(new CharFilterFactory[] {}),
tokenFilterFactories.toArray(new TokenFilterFactory[] {})
);
return produceAnalyzer("__custom__", new AnalyzerProvider() {
@Override
public String name() {
return "__custom__";
}
@Override
public AnalyzerScope scope() {
return AnalyzerScope.GLOBAL;
}
@Override
public Analyzer get() {
return analyzer;
}
}, null, null, null);
}
public Map buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
final Map tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
return buildMapping(
Component.FILTER,
indexSettings,
tokenFiltersSettings,
this.tokenFilters,
prebuiltAnalysis.preConfiguredTokenFilters
);
}
public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
final Map tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.preConfiguredTokenizers);
}
public Map buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
final Map charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
return buildMapping(
Component.CHAR_FILTER,
indexSettings,
charFiltersSettings,
charFilters,
prebuiltAnalysis.preConfiguredCharFilterFactories
);
}
private Map> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
final Map analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
return buildMapping(Component.ANALYZER, indexSettings, analyzersSettings, analyzers, prebuiltAnalysis.analyzerProviderFactories);
}
private Map> buildNormalizerFactories(IndexSettings indexSettings) throws IOException {
final Map normalizersSettings = indexSettings.getSettings().getGroups("index.analysis.normalizer");
return buildMapping(Component.NORMALIZER, indexSettings, normalizersSettings, normalizers, Collections.emptyMap());
}
/**
* Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings}
* or a registered {@link TokenizerFactory} provider by predefined name
* or null
if the tokenizer was not registered
* @param tokenizer global or defined tokenizer name
* @param indexSettings an index settings
* @return {@link TokenizerFactory} provider or null
*/
private AnalysisProvider getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
return getProvider(
Component.TOKENIZER,
tokenizer,
indexSettings,
"index.analysis.tokenizer",
tokenizers,
this::getTokenizerProvider
);
}
/**
* Returns a registered {@link TokenFilterFactory} provider by {@link IndexSettings}
* or a registered {@link TokenFilterFactory} provider by predefined name
* or null
if the tokenFilter was not registered
* @param tokenFilter global or defined tokenFilter name
* @param indexSettings an index settings
* @return {@link TokenFilterFactory} provider or null
*/
private AnalysisProvider getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
return getProvider(
Component.FILTER,
tokenFilter,
indexSettings,
"index.analysis.filter",
tokenFilters,
this::getTokenFilterProvider
);
}
/**
* Returns a registered {@link CharFilterFactory} provider by {@link IndexSettings}
* or a registered {@link CharFilterFactory} provider by predefined name
* or null
if the charFilter was not registered
* @param charFilter global or defined charFilter name
* @param indexSettings an index settings
* @return {@link CharFilterFactory} provider or null
*/
private AnalysisProvider getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
return getProvider(
Component.CHAR_FILTER,
charFilter,
indexSettings,
"index.analysis.char_filter",
charFilters,
this::getCharFilterProvider
);
}
private AnalysisProvider getProvider(
Component componentType,
String componentName,
IndexSettings indexSettings,
String componentSettings,
Map> providers,
Function> providerFunction
) {
final Map subSettings = indexSettings.getSettings().getGroups(componentSettings);
if (subSettings.containsKey(componentName)) {
Settings currentSettings = subSettings.get(componentName);
return getAnalysisProvider(componentType, providers, componentName, currentSettings.get("type"));
} else {
return providerFunction.apply(componentName);
}
}
enum Component {
ANALYZER {
@Override
public String toString() {
return "analyzer";
}
},
NORMALIZER {
@Override
public String toString() {
return "normalizer";
}
},
CHAR_FILTER {
@Override
public String toString() {
return "char_filter";
}
},
TOKENIZER {
@Override
public String toString() {
return "tokenizer";
}
},
FILTER {
@Override
public String toString() {
return "filter";
}
};
}
@SuppressWarnings("unchecked")
private Map buildMapping(
Component component,
IndexSettings settings,
Map settingsMap,
Map> providerMap,
Map> defaultInstance
) throws IOException {
Settings defaultSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, settings.getIndexVersionCreated()).build();
Map factories = new HashMap<>();
for (Map.Entry entry : settingsMap.entrySet()) {
String name = entry.getKey();
Settings currentSettings = entry.getValue();
String typeName = currentSettings.get("type");
if (component == Component.ANALYZER) {
T factory = null;
if (typeName == null) {
if (currentSettings.get("tokenizer") != null) {
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
} else {
throw new IllegalArgumentException(
component + " [" + name + "] " + "must specify either an analyzer type, or a tokenizer"
);
}
} else if (typeName.equals("custom")) {
factory = (T) new CustomAnalyzerProvider(settings, name, currentSettings);
}
if (factory != null) {
factories.put(name, factory);
continue;
}
} else if (component == Component.NORMALIZER) {
if (typeName == null || typeName.equals("custom")) {
T factory = (T) new CustomNormalizerProvider(settings, name, currentSettings);
factories.put(name, factory);
continue;
}
}
AnalysisProvider type = getAnalysisProvider(component, providerMap, name, typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
}
final T factory = type.get(settings, environment, name, currentSettings);
factories.put(name, factory);
}
// go over the char filters in the bindings and register the ones that are not configured
for (Map.Entry> entry : providerMap.entrySet()) {
String name = entry.getKey();
AnalysisProvider provider = entry.getValue();
// we don't want to re-register one that already exists
if (settingsMap.containsKey(name)) {
continue;
}
// check, if it requires settings, then don't register it, we know default has no settings...
if (provider.requiresAnalysisSettings()) {
continue;
}
AnalysisProvider defaultProvider = defaultInstance.get(name);
final T instance;
if (defaultProvider == null) {
instance = provider.get(settings, environment, name, defaultSettings);
} else {
instance = defaultProvider.get(settings, environment, name, defaultSettings);
}
factories.put(name, instance);
}
for (Map.Entry> entry : defaultInstance.entrySet()) {
final String name = entry.getKey();
final AnalysisProvider provider = entry.getValue();
factories.putIfAbsent(name, provider.get(settings, environment, name, defaultSettings));
}
return factories;
}
private static AnalysisProvider getAnalysisProvider(
Component component,
Map> providerMap,
String name,
String typeName
) {
if (typeName == null) {
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
AnalysisProvider type = providerMap.get(typeName);
if (type == null) {
throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]");
}
return type;
}
private static class PrebuiltAnalysis implements Closeable {
final Map>> analyzerProviderFactories;
final Map> preConfiguredTokenFilters;
final Map> preConfiguredTokenizers;
final Map> preConfiguredCharFilterFactories;
private PrebuiltAnalysis(
Map preConfiguredCharFilters,
Map preConfiguredTokenFilters,
Map preConfiguredTokenizers,
Map preConfiguredAnalyzers
) {
Map analyzerProviderFactories = new HashMap<>();
analyzerProviderFactories.putAll(preConfiguredAnalyzers);
// Pre-build analyzers
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, preBuiltAnalyzerEnum));
}
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
this.preConfiguredCharFilterFactories = preConfiguredCharFilters;
this.preConfiguredTokenFilters = preConfiguredTokenFilters;
this.preConfiguredTokenizers = preConfiguredTokenizers;
}
AnalysisProvider getCharFilterFactory(String name) {
return preConfiguredCharFilterFactories.get(name);
}
AnalysisProvider getTokenFilterFactory(String name) {
return preConfiguredTokenFilters.get(name);
}
AnalysisProvider getTokenizerFactory(String name) {
return preConfiguredTokenizers.get(name);
}
AnalysisProvider> getAnalyzerProvider(String name) {
return analyzerProviderFactories.get(name);
}
@Override
public void close() throws IOException {
IOUtils.close(
analyzerProviderFactories.values().stream().map((a) -> ((PreBuiltAnalyzerProviderFactory) a)).collect(Collectors.toList())
);
}
}
public IndexAnalyzers build(
IndexSettings indexSettings,
Map> analyzerProviders,
Map> normalizerProviders,
Map tokenizerFactoryFactories,
Map charFilterFactoryFactories,
Map tokenFilterFactoryFactories
) {
Map analyzers = new HashMap<>();
Map normalizers = new HashMap<>();
Map whitespaceNormalizers = new HashMap<>();
for (Map.Entry> entry : analyzerProviders.entrySet()) {
analyzers.merge(
entry.getKey(),
produceAnalyzer(
entry.getKey(),
entry.getValue(),
tokenFilterFactoryFactories,
charFilterFactoryFactories,
tokenizerFactoryFactories
),
(k, v) -> { throw new IllegalStateException("already registered analyzer with name: " + entry.getKey()); }
);
}
for (Map.Entry> entry : normalizerProviders.entrySet()) {
processNormalizerFactory(
entry.getKey(),
entry.getValue(),
normalizers,
TokenizerFactory.newFactory("keyword", KeywordTokenizer::new),
tokenFilterFactoryFactories,
charFilterFactoryFactories
);
processNormalizerFactory(
entry.getKey(),
entry.getValue(),
whitespaceNormalizers,
TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new),
tokenFilterFactoryFactories,
charFilterFactoryFactories
);
}
for (Analyzer analyzer : normalizers.values()) {
analyzer.normalize("", ""); // check for deprecations
}
if (!analyzers.containsKey(DEFAULT_ANALYZER_NAME)) {
analyzers.put(
DEFAULT_ANALYZER_NAME,
produceAnalyzer(
DEFAULT_ANALYZER_NAME,
new StandardAnalyzerProvider(indexSettings, null, DEFAULT_ANALYZER_NAME, Settings.Builder.EMPTY_SETTINGS),
tokenFilterFactoryFactories,
charFilterFactoryFactories,
tokenizerFactoryFactories
)
);
}
NamedAnalyzer defaultAnalyzer = analyzers.get(DEFAULT_ANALYZER_NAME);
if (defaultAnalyzer == null) {
throw new IllegalArgumentException("no default analyzer configured");
}
defaultAnalyzer.checkAllowedInMode(AnalysisMode.ALL);
if (analyzers.containsKey("default_index")) {
throw new IllegalArgumentException(
"setting [index.analysis.analyzer.default_index] is not supported anymore, use "
+ "[index.analysis.analyzer.default] instead for index ["
+ indexSettings.getIndex().getName()
+ "]"
);
}
for (Map.Entry analyzer : analyzers.entrySet()) {
if (analyzer.getKey().startsWith("_")) {
throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\"");
}
}
return new IndexAnalyzers(analyzers, normalizers, whitespaceNormalizers);
}
private static NamedAnalyzer produceAnalyzer(
String name,
AnalyzerProvider> analyzerFactory,
Map tokenFilters,
Map charFilters,
Map tokenizers
) {
/*
* Lucene defaults positionIncrementGap to 0 in all analyzers but
* Elasticsearch defaults them to 0 only before version 2.0
* and 100 afterwards so we override the positionIncrementGap if it
* doesn't match here.
*/
int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
if (analyzerFactory instanceof CustomAnalyzerProvider) {
((CustomAnalyzerProvider) analyzerFactory).build(tokenizers, charFilters, tokenFilters);
/*
* Custom analyzers already default to the correct, version
* dependent positionIncrementGap and the user is be able to
* configure the positionIncrementGap directly on the analyzer so
* we disable overriding the positionIncrementGap to preserve the
* user's setting.
*/
overridePositionIncrementGap = Integer.MIN_VALUE;
}
Analyzer analyzerF = analyzerFactory.get();
if (analyzerF == null) {
throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
}
NamedAnalyzer analyzer;
if (analyzerF instanceof NamedAnalyzer) {
// if we got a named analyzer back, use it...
analyzer = (NamedAnalyzer) analyzerF;
if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
// unless the positionIncrementGap needs to be overridden
analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
}
} else {
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
}
checkVersions(analyzer);
return analyzer;
}
private void processNormalizerFactory(
String name,
AnalyzerProvider> normalizerFactory,
Map normalizers,
TokenizerFactory tokenizerFactory,
Map tokenFilters,
Map charFilters
) {
if (tokenizerFactory == null) {
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
}
if (normalizerFactory instanceof CustomNormalizerProvider) {
((CustomNormalizerProvider) normalizerFactory).build(tokenizerFactory, charFilters, tokenFilters);
}
if (normalizers.containsKey(name)) {
throw new IllegalStateException("already registered analyzer with name: " + name);
}
Analyzer normalizerF = normalizerFactory.get();
if (normalizerF == null) {
throw new IllegalArgumentException("normalizer [" + normalizerFactory.name() + "] created null normalizer");
}
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
normalizers.put(name, normalizer);
}
// Some analysis components emit deprecation warnings or throw exceptions when used
// with the wrong version of opensearch. These exceptions and warnings are
// normally thrown when tokenstreams are constructed, which unless we build a
// tokenstream up-front does not happen until a document is indexed. In order to
// surface these warnings or exceptions as early as possible, we build an empty
// tokenstream and pull it through an Analyzer at construction time.
private static void checkVersions(Analyzer analyzer) {
try (TokenStream ts = analyzer.tokenStream("", "")) {
ts.reset();
while (ts.incrementToken()) {
}
ts.end();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy