All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.analysis.AnalysisService Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.analysis;

import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.settings.IndexSettingsService;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;

import java.io.Closeable;
import java.util.Map;

import static com.google.common.collect.Maps.newHashMap;

/**
 * Manages configuring and storing analyzers, tokenizers, char filters and token filters.
 */
public class AnalysisService extends AbstractIndexComponent implements Closeable {

    private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(AnalysisService.class));

    private final ImmutableMap analyzers;
    private final ImmutableMap tokenizers;
    private final ImmutableMap charFilters;
    private final ImmutableMap tokenFilters;

    private final NamedAnalyzer defaultIndexAnalyzer;
    private final NamedAnalyzer defaultSearchAnalyzer;
    private final NamedAnalyzer defaultSearchQuoteAnalyzer;

    public AnalysisService(Index index, Settings indexSettings) {
        this(index, indexSettings, null, null, null, null, null);
    }


    @Inject
    public AnalysisService(Index index, IndexSettingsService indexSettingsService, @Nullable IndicesAnalysisService indicesAnalysisService,
                           @Nullable Map analyzerFactoryFactories,
                           @Nullable Map tokenizerFactoryFactories,
                           @Nullable Map charFilterFactoryFactories,
                           @Nullable Map tokenFilterFactoryFactories) {
        this(index, indexSettingsService.getSettings(), indicesAnalysisService, analyzerFactoryFactories, tokenizerFactoryFactories,
                charFilterFactoryFactories, tokenFilterFactoryFactories);

    }

    //package private for testing
    AnalysisService(Index index, Settings indexSettings, @Nullable IndicesAnalysisService indicesAnalysisService,
                           @Nullable Map analyzerFactoryFactories,
                           @Nullable Map tokenizerFactoryFactories,
                           @Nullable Map charFilterFactoryFactories,
                           @Nullable Map tokenFilterFactoryFactories) {
        super(index, indexSettings);
        Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.indexCreated(indexSettings)).build();
        Map tokenizers = newHashMap();
        if (tokenizerFactoryFactories != null) {
            Map tokenizersSettings = indexSettings.getGroups("index.analysis.tokenizer");
            for (Map.Entry entry : tokenizerFactoryFactories.entrySet()) {
                String tokenizerName = entry.getKey();
                TokenizerFactoryFactory tokenizerFactoryFactory = entry.getValue();

                Settings tokenizerSettings = tokenizersSettings.get(tokenizerName);
                if (tokenizerSettings == null) {
                    tokenizerSettings = defaultSettings;
                }

                TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings);
                tokenizers.put(tokenizerName, tokenizerFactory);
            }
        }

        if (indicesAnalysisService != null) {
            for (Map.Entry entry : indicesAnalysisService.tokenizerFactories().entrySet()) {
                String name = entry.getKey();
                if (!tokenizers.containsKey(name)) {
                    tokenizers.put(name, entry.getValue().create(name, defaultSettings));
                }
            }
        }

        this.tokenizers = ImmutableMap.copyOf(tokenizers);

        Map charFilters = newHashMap();
        if (charFilterFactoryFactories != null) {
            Map charFiltersSettings = indexSettings.getGroups("index.analysis.char_filter");
            for (Map.Entry entry : charFilterFactoryFactories.entrySet()) {
                String charFilterName = entry.getKey();
                CharFilterFactoryFactory charFilterFactoryFactory = entry.getValue();

                Settings charFilterSettings = charFiltersSettings.get(charFilterName);
                if (charFilterSettings == null) {
                    charFilterSettings = defaultSettings;
                }

                CharFilterFactory tokenFilterFactory = charFilterFactoryFactory.create(charFilterName, charFilterSettings);
                charFilters.put(charFilterName, tokenFilterFactory);
            }
        }

        if (indicesAnalysisService != null) {
            for (Map.Entry entry : indicesAnalysisService.charFilterFactories().entrySet()) {
                String name = entry.getKey();
                if (!charFilters.containsKey(name)) {
                    charFilters.put(name, entry.getValue().create(name, defaultSettings));
                }
            }
        }

        this.charFilters = ImmutableMap.copyOf(charFilters);

        Map tokenFilters = newHashMap();
        if (tokenFilterFactoryFactories != null) {
            Map tokenFiltersSettings = indexSettings.getGroups("index.analysis.filter");
            for (Map.Entry entry : tokenFilterFactoryFactories.entrySet()) {
                String tokenFilterName = entry.getKey();
                TokenFilterFactoryFactory tokenFilterFactoryFactory = entry.getValue();

                Settings tokenFilterSettings = tokenFiltersSettings.get(tokenFilterName);
                if (tokenFilterSettings == null) {
                    tokenFilterSettings = defaultSettings;
                }

                TokenFilterFactory tokenFilterFactory = tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings);
                tokenFilters.put(tokenFilterName, tokenFilterFactory);
            }
        }

        // pre initialize the globally registered ones into the map
        if (indicesAnalysisService != null) {
            for (Map.Entry entry : indicesAnalysisService.tokenFilterFactories().entrySet()) {
                String name = entry.getKey();
                if (!tokenFilters.containsKey(name)) {
                    tokenFilters.put(name, entry.getValue().create(name, defaultSettings));
                }
            }
        }
        this.tokenFilters = ImmutableMap.copyOf(tokenFilters);

        Map analyzerProviders = newHashMap();
        if (analyzerFactoryFactories != null) {
            Map analyzersSettings = indexSettings.getGroups("index.analysis.analyzer");
            for (Map.Entry entry : analyzerFactoryFactories.entrySet()) {
                String analyzerName = entry.getKey();
                AnalyzerProviderFactory analyzerFactoryFactory = entry.getValue();

                Settings analyzerSettings = analyzersSettings.get(analyzerName);
                if (analyzerSettings == null) {
                    analyzerSettings = defaultSettings;
                }

                AnalyzerProvider analyzerFactory = analyzerFactoryFactory.create(analyzerName, analyzerSettings);
                analyzerProviders.put(analyzerName, analyzerFactory);
            }
        }
        if (indicesAnalysisService != null) {
            for (Map.Entry entry : indicesAnalysisService.analyzerProviderFactories().entrySet()) {
                String name = entry.getKey();
                Version indexVersion = Version.indexCreated(indexSettings);
                if (!analyzerProviders.containsKey(name)) {
                    analyzerProviders.put(name, entry.getValue().create(name, Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build()));
                }
            }
        }

        if (!analyzerProviders.containsKey("default")) {
            analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS));
        }
        if (!analyzerProviders.containsKey("default_index")) {
            analyzerProviders.put("default_index", analyzerProviders.get("default"));
        }
        if (!analyzerProviders.containsKey("default_search")) {
            analyzerProviders.put("default_search", analyzerProviders.get("default"));
        }
        if (!analyzerProviders.containsKey("default_search_quoted")) {
            analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search"));
        }

        Map analyzers = newHashMap();
        for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
            /*
             * Lucene defaults positionIncrementGap to 0 in all analyzers but
             * Elasticsearch defaults them to 0 only before version 2.0
             * and 100 afterwards so we override the positionIncrementGap if it
             * doesn't match here.
             */
            int overridePositionIncrementGap = StringFieldMapper.Defaults.positionIncrementGap(Version.indexCreated(indexSettings));
            if (analyzerFactory instanceof CustomAnalyzerProvider) {
                ((CustomAnalyzerProvider) analyzerFactory).build(this);
                /*
                 * Custom analyzers already default to the correct, version
                 * dependent positionIncrementGap and the user is be able to
                 * configure the positionIncrementGap directly on the analyzer so
                 * we disable overriding the positionIncrementGap to preserve the
                 * user's setting.
                 */
                overridePositionIncrementGap = Integer.MIN_VALUE;
            }
            Analyzer analyzerF = analyzerFactory.get();
            if (analyzerF == null) {
                throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
            }
            NamedAnalyzer analyzer;
            if (analyzerF instanceof NamedAnalyzer) {
                // if we got a named analyzer back, use it...
                analyzer = (NamedAnalyzer) analyzerF;
                if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
                    // unless the positionIncrementGap needs to be overridden
                    analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
                }
            } else {
                analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
            }
            analyzers.put(analyzerFactory.name(), analyzer);
            String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
            if (strAliases != null) {
                for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
                    analyzers.put(alias, analyzer);
                }
            }
            String[] aliases = indexSettings.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
            for (String alias : aliases) {
                analyzers.put(alias, analyzer);
            }
        }

        NamedAnalyzer defaultAnalyzer = analyzers.get("default");
        if (defaultAnalyzer == null) {
            throw new IllegalArgumentException("no default analyzer configured");
        }
        if (analyzers.containsKey("default_index")) {
            deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index.getName());
        }
        defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer;
        defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer;
        defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer;

        for (Map.Entry analyzer : analyzers.entrySet()) {
            if (analyzer.getKey().startsWith("_")) {
                throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\"");
            }
        }
        this.analyzers = ImmutableMap.copyOf(analyzers);
    }

    @Override
    public void close() {
        for (NamedAnalyzer analyzer : analyzers.values()) {
            if (analyzer.scope() == AnalyzerScope.INDEX) {
                try {
                    analyzer.close();
                } catch (NullPointerException e) {
                    // because analyzers are aliased, they might be closed several times
                    // an NPE is thrown in this case, so ignore....
                } catch (Exception e) {
                    logger.debug("failed to close analyzer " + analyzer);
                }
            }
        }
    }

    public NamedAnalyzer analyzer(String name) {
        NamedAnalyzer analyzer = analyzers.get(name);
        if (analyzer != null) {
            return analyzer;
        }
        String underscoreName = Strings.toUnderscoreCase(name);
        analyzer = analyzers.get(underscoreName);
        if (analyzer != null) {
            DEPRECATION_LOGGER.deprecated("Deprecated analyzer name [" + name + "], use [" + underscoreName + "] instead");
        }
        return analyzer;
    }

    public NamedAnalyzer defaultIndexAnalyzer() {
        return defaultIndexAnalyzer;
    }

    public NamedAnalyzer defaultSearchAnalyzer() {
        return defaultSearchAnalyzer;
    }

    public NamedAnalyzer defaultSearchQuoteAnalyzer() {
        return defaultSearchQuoteAnalyzer;
    }

    public TokenizerFactory tokenizer(String name) {
        TokenizerFactory tokenizerFactory = tokenizers.get(name);
        if (tokenizerFactory != null) {
            return tokenizerFactory;
        }
        String underscoreName = Strings.toUnderscoreCase(name);
        tokenizerFactory = tokenizers.get(underscoreName);
        if (tokenizerFactory != null) {
            DEPRECATION_LOGGER.deprecated("Deprecated tokenizer name [" + name + "], use [" + underscoreName + "] instead");
        }
        return tokenizerFactory;
    }

    public CharFilterFactory charFilter(String name) {
        CharFilterFactory charFilterFactory = charFilters.get(name);
        if (charFilterFactory != null) {
            return charFilterFactory;
        }
        String underscoreName = Strings.toUnderscoreCase(name);
        charFilterFactory = charFilters.get(underscoreName);
        if (charFilterFactory != null) {
            DEPRECATION_LOGGER.deprecated("Deprecated char_filter name [" + name + "], use [" + underscoreName + "] instead");
        }
        return charFilterFactory;
    }

    public TokenFilterFactory tokenFilter(String name) {
        TokenFilterFactory tokenFilterFactory = tokenFilters.get(name);
        if (tokenFilterFactory != null) {
            return tokenFilterFactory;
        }
        String underscoreName = Strings.toUnderscoreCase(name);
        tokenFilterFactory = tokenFilters.get(underscoreName);
        if (tokenFilterFactory != null) {
            DEPRECATION_LOGGER.deprecated("Deprecated token_filter name [" + name + "], use [" + underscoreName + "] instead");
        }
        return tokenFilterFactory;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy