org.opensearch.action.admin.cluster.stats.AnalysisStats Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
OpenSearch subproject :server
There is a newer version: 2.18.0
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.action.admin.cluster.stats;

import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.metadata.MappingMetadata;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.ToXContent;
import org.opensearch.core.xcontent.ToXContentFragment;
import org.opensearch.core.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
 * Statistics about analysis usage.
 *
 * @opensearch.api
 */
@PublicApi(since = "1.0.0")
public final class AnalysisStats implements ToXContentFragment, Writeable {

    /**
     * Create {@link AnalysisStats} from the given cluster state.
     */
    public static AnalysisStats of(ClusterState state) {
        final Map usedCharFilterTypes = new HashMap<>();
        final Map usedTokenizerTypes = new HashMap<>();
        final Map usedTokenFilterTypes = new HashMap<>();
        final Map usedAnalyzerTypes = new HashMap<>();
        final Map usedBuiltInCharFilters = new HashMap<>();
        final Map usedBuiltInTokenizers = new HashMap<>();
        final Map usedBuiltInTokenFilters = new HashMap<>();
        final Map usedBuiltInAnalyzers = new HashMap<>();

        for (IndexMetadata indexMetadata : state.metadata()) {
            Set indexAnalyzers = new HashSet<>();
            MappingMetadata mappingMetadata = indexMetadata.mapping();
            if (mappingMetadata != null) {
                MappingVisitor.visitMapping(mappingMetadata.getSourceAsMap(), fieldMapping -> {
                    for (String key : new String[] { "analyzer", "search_analyzer", "search_quote_analyzer" }) {
                        Object analyzerO = fieldMapping.get(key);
                        if (analyzerO != null) {
                            final String analyzer = analyzerO.toString();
                            IndexFeatureStats stats = usedBuiltInAnalyzers.computeIfAbsent(analyzer, IndexFeatureStats::new);
                            stats.count++;
                            if (indexAnalyzers.add(analyzer)) {
                                stats.indexCount++;
                            }
                        }
                    }
                });
            }

            Set indexCharFilters = new HashSet<>();
            Set indexTokenizers = new HashSet<>();
            Set indexTokenFilters = new HashSet<>();

            Set indexAnalyzerTypes = new HashSet<>();
            Set indexCharFilterTypes = new HashSet<>();
            Set indexTokenizerTypes = new HashSet<>();
            Set indexTokenFilterTypes = new HashSet<>();

            Settings indexSettings = indexMetadata.getSettings();
            Map analyzerSettings = indexSettings.getGroups("index.analysis.analyzer");
            usedBuiltInAnalyzers.keySet().removeAll(analyzerSettings.keySet());
            for (Settings analyzerSetting : analyzerSettings.values()) {
                final String analyzerType = analyzerSetting.get("type", "custom");
                IndexFeatureStats stats = usedAnalyzerTypes.computeIfAbsent(analyzerType, IndexFeatureStats::new);
                stats.count++;
                if (indexAnalyzerTypes.add(analyzerType)) {
                    stats.indexCount++;
                }

                for (String charFilter : analyzerSetting.getAsList("char_filter")) {
                    stats = usedBuiltInCharFilters.computeIfAbsent(charFilter, IndexFeatureStats::new);
                    stats.count++;
                    if (indexCharFilters.add(charFilter)) {
                        stats.indexCount++;
                    }
                }

                String tokenizer = analyzerSetting.get("tokenizer");
                if (tokenizer != null) {
                    stats = usedBuiltInTokenizers.computeIfAbsent(tokenizer, IndexFeatureStats::new);
                    stats.count++;
                    if (indexTokenizers.add(tokenizer)) {
                        stats.indexCount++;
                    }
                }

                for (String filter : analyzerSetting.getAsList("filter")) {
                    stats = usedBuiltInTokenFilters.computeIfAbsent(filter, IndexFeatureStats::new);
                    stats.count++;
                    if (indexTokenFilters.add(filter)) {
                        stats.indexCount++;
                    }
                }
            }

            Map charFilterSettings = indexSettings.getGroups("index.analysis.char_filter");
            usedBuiltInCharFilters.keySet().removeAll(charFilterSettings.keySet());
            aggregateAnalysisTypes(charFilterSettings.values(), usedCharFilterTypes, indexCharFilterTypes);

            Map tokenizerSettings = indexSettings.getGroups("index.analysis.tokenizer");
            usedBuiltInTokenizers.keySet().removeAll(tokenizerSettings.keySet());
            aggregateAnalysisTypes(tokenizerSettings.values(), usedTokenizerTypes, indexTokenizerTypes);

            Map tokenFilterSettings = indexSettings.getGroups("index.analysis.filter");
            usedBuiltInTokenFilters.keySet().removeAll(tokenFilterSettings.keySet());
            aggregateAnalysisTypes(tokenFilterSettings.values(), usedTokenFilterTypes, indexTokenFilterTypes);
        }
        return new AnalysisStats(
            usedCharFilterTypes.values(),
            usedTokenizerTypes.values(),
            usedTokenFilterTypes.values(),
            usedAnalyzerTypes.values(),
            usedBuiltInCharFilters.values(),
            usedBuiltInTokenizers.values(),
            usedBuiltInTokenFilters.values(),
            usedBuiltInAnalyzers.values()
        );
    }

    private static void aggregateAnalysisTypes(
        Collection settings,
        Map stats,
        Set indexTypes
    ) {
        for (Settings analysisComponentSettings : settings) {
            final String type = analysisComponentSettings.get("type");
            if (type != null) {
                IndexFeatureStats s = stats.computeIfAbsent(type, IndexFeatureStats::new);
                s.count++;
                if (indexTypes.add(type)) {
                    s.indexCount++;
                }
            }
        }
    }

    private static Set sort(Collection set) {
        List list = new ArrayList<>(set);
        list.sort(Comparator.comparing(IndexFeatureStats::getName));
        return Collections.unmodifiableSet(new LinkedHashSet<>(list));
    }

    private final Set usedCharFilters, usedTokenizers, usedTokenFilters, usedAnalyzers;
    private final Set usedBuiltInCharFilters, usedBuiltInTokenizers, usedBuiltInTokenFilters, usedBuiltInAnalyzers;

    AnalysisStats(
        Collection usedCharFilters,
        Collection usedTokenizers,
        Collection usedTokenFilters,
        Collection usedAnalyzers,
        Collection usedBuiltInCharFilters,
        Collection usedBuiltInTokenizers,
        Collection usedBuiltInTokenFilters,
        Collection usedBuiltInAnalyzers
    ) {
        this.usedCharFilters = sort(usedCharFilters);
        this.usedTokenizers = sort(usedTokenizers);
        this.usedTokenFilters = sort(usedTokenFilters);
        this.usedAnalyzers = sort(usedAnalyzers);
        this.usedBuiltInCharFilters = sort(usedBuiltInCharFilters);
        this.usedBuiltInTokenizers = sort(usedBuiltInTokenizers);
        this.usedBuiltInTokenFilters = sort(usedBuiltInTokenFilters);
        this.usedBuiltInAnalyzers = sort(usedBuiltInAnalyzers);
    }

    public AnalysisStats(StreamInput input) throws IOException {
        usedCharFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedTokenizers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedTokenFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedAnalyzers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedBuiltInCharFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedBuiltInTokenizers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedBuiltInTokenFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
        usedBuiltInAnalyzers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
    }

    @Override
    public void writeTo(StreamOutput out) throws IOException {
        out.writeCollection(usedCharFilters);
        out.writeCollection(usedTokenizers);
        out.writeCollection(usedTokenFilters);
        out.writeCollection(usedAnalyzers);
        out.writeCollection(usedBuiltInCharFilters);
        out.writeCollection(usedBuiltInTokenizers);
        out.writeCollection(usedBuiltInTokenFilters);
        out.writeCollection(usedBuiltInAnalyzers);
    }

    /**
     * Return the set of used char filters in the cluster.
     */
    public Set getUsedCharFilterTypes() {
        return usedCharFilters;
    }

    /**
     * Return the set of used tokenizers in the cluster.
     */
    public Set getUsedTokenizerTypes() {
        return usedTokenizers;
    }

    /**
     * Return the set of used token filters in the cluster.
     */
    public Set getUsedTokenFilterTypes() {
        return usedTokenFilters;
    }

    /**
     * Return the set of used analyzers in the cluster.
     */
    public Set getUsedAnalyzerTypes() {
        return usedAnalyzers;
    }

    /**
     * Return the set of used built-in char filters in the cluster.
     */
    public Set getUsedBuiltInCharFilters() {
        return usedBuiltInCharFilters;
    }

    /**
     * Return the set of used built-in tokenizers in the cluster.
     */
    public Set getUsedBuiltInTokenizers() {
        return usedBuiltInTokenizers;
    }

    /**
     * Return the set of used built-in token filters in the cluster.
     */
    public Set getUsedBuiltInTokenFilters() {
        return usedBuiltInTokenFilters;
    }

    /**
     * Return the set of used built-in analyzers in the cluster.
     */
    public Set getUsedBuiltInAnalyzers() {
        return usedBuiltInAnalyzers;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        AnalysisStats that = (AnalysisStats) o;
        return Objects.equals(usedCharFilters, that.usedCharFilters)
            && Objects.equals(usedTokenizers, that.usedTokenizers)
            && Objects.equals(usedTokenFilters, that.usedTokenFilters)
            && Objects.equals(usedAnalyzers, that.usedAnalyzers)
            && Objects.equals(usedBuiltInCharFilters, that.usedBuiltInCharFilters)
            && Objects.equals(usedBuiltInTokenizers, that.usedBuiltInTokenizers)
            && Objects.equals(usedBuiltInTokenFilters, that.usedBuiltInTokenFilters)
            && Objects.equals(usedBuiltInAnalyzers, that.usedBuiltInAnalyzers);
    }

    @Override
    public int hashCode() {
        return Objects.hash(
            usedCharFilters,
            usedTokenizers,
            usedTokenFilters,
            usedAnalyzers,
            usedBuiltInCharFilters,
            usedBuiltInTokenizers,
            usedBuiltInTokenFilters,
            usedBuiltInAnalyzers
        );
    }

    private void toXContentCollection(XContentBuilder builder, Params params, String name, Collection coll)
        throws IOException {
        builder.startArray(name);
        for (ToXContent toXContent : coll) {
            toXContent.toXContent(builder, params);
        }
        builder.endArray();
    }

    @Override
    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
        builder.startObject("analysis");
        toXContentCollection(builder, params, "char_filter_types", usedCharFilters);
        toXContentCollection(builder, params, "tokenizer_types", usedTokenizers);
        toXContentCollection(builder, params, "filter_types", usedTokenFilters);
        toXContentCollection(builder, params, "analyzer_types", usedAnalyzers);
        toXContentCollection(builder, params, "built_in_char_filters", usedBuiltInCharFilters);
        toXContentCollection(builder, params, "built_in_tokenizers", usedBuiltInTokenizers);
        toXContentCollection(builder, params, "built_in_filters", usedBuiltInTokenFilters);
        toXContentCollection(builder, params, "built_in_analyzers", usedBuiltInAnalyzers);
        builder.endObject();
        return builder;
    }

    @Override
    public String toString() {
        return Strings.toString(MediaTypeRegistry.JSON, this, true, true);
    }
}