
org.elasticsearch.action.admin.cluster.stats.AnalysisStats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.action.admin.cluster.stats;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* Statistics about analysis usage.
*/
public final class AnalysisStats implements ToXContentFragment, Writeable {
/**
* Create {@link AnalysisStats} from the given cluster state.
*/
public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
final Map usedCharFilterTypes = new HashMap<>();
final Map usedTokenizerTypes = new HashMap<>();
final Map usedTokenFilterTypes = new HashMap<>();
final Map usedAnalyzerTypes = new HashMap<>();
final Map usedBuiltInCharFilters = new HashMap<>();
final Map usedBuiltInTokenizers = new HashMap<>();
final Map usedBuiltInTokenFilters = new HashMap<>();
final Map usedBuiltInAnalyzers = new HashMap<>();
final Map mappingCounts = new IdentityHashMap<>(metadata.getMappingsByHash().size());
for (IndexMetadata indexMetadata : metadata) {
ensureNotCancelled.run();
if (indexMetadata.isSystem()) {
// Don't include system indices in statistics about analysis,
// we care about the user's indices.
continue;
}
Set indexCharFilters = new HashSet<>();
Set indexTokenizers = new HashSet<>();
Set indexTokenFilters = new HashSet<>();
Set indexAnalyzerTypes = new HashSet<>();
Set indexCharFilterTypes = new HashSet<>();
Set indexTokenizerTypes = new HashSet<>();
Set indexTokenFilterTypes = new HashSet<>();
Settings indexSettings = indexMetadata.getSettings();
Map analyzerSettings = indexSettings.getGroups("index.analysis.analyzer");
usedBuiltInAnalyzers.keySet().removeAll(analyzerSettings.keySet());
for (Settings analyzerSetting : analyzerSettings.values()) {
final String analyzerType = analyzerSetting.get("type", "custom");
IndexFeatureStats stats = usedAnalyzerTypes.computeIfAbsent(analyzerType, IndexFeatureStats::new);
stats.count++;
if (indexAnalyzerTypes.add(analyzerType)) {
stats.indexCount++;
}
for (String charFilter : analyzerSetting.getAsList("char_filter")) {
stats = usedBuiltInCharFilters.computeIfAbsent(charFilter, IndexFeatureStats::new);
stats.count++;
if (indexCharFilters.add(charFilter)) {
stats.indexCount++;
}
}
String tokenizer = analyzerSetting.get("tokenizer");
if (tokenizer != null) {
stats = usedBuiltInTokenizers.computeIfAbsent(tokenizer, IndexFeatureStats::new);
stats.count++;
if (indexTokenizers.add(tokenizer)) {
stats.indexCount++;
}
}
for (String filter : analyzerSetting.getAsList("filter")) {
stats = usedBuiltInTokenFilters.computeIfAbsent(filter, IndexFeatureStats::new);
stats.count++;
if (indexTokenFilters.add(filter)) {
stats.indexCount++;
}
}
}
Map charFilterSettings = indexSettings.getGroups("index.analysis.char_filter");
usedBuiltInCharFilters.keySet().removeAll(charFilterSettings.keySet());
aggregateAnalysisTypes(charFilterSettings.values(), usedCharFilterTypes, indexCharFilterTypes);
Map tokenizerSettings = indexSettings.getGroups("index.analysis.tokenizer");
usedBuiltInTokenizers.keySet().removeAll(tokenizerSettings.keySet());
aggregateAnalysisTypes(tokenizerSettings.values(), usedTokenizerTypes, indexTokenizerTypes);
Map tokenFilterSettings = indexSettings.getGroups("index.analysis.filter");
usedBuiltInTokenFilters.keySet().removeAll(tokenFilterSettings.keySet());
aggregateAnalysisTypes(tokenFilterSettings.values(), usedTokenFilterTypes, indexTokenFilterTypes);
countMapping(mappingCounts, indexMetadata);
}
for (Map.Entry mappingAndCount : mappingCounts.entrySet()) {
ensureNotCancelled.run();
Set indexAnalyzers = new HashSet<>();
final int count = mappingAndCount.getValue();
MappingVisitor.visitMapping(mappingAndCount.getKey().getSourceAsMap(), (field, fieldMapping) -> {
for (String key : new String[] { "analyzer", "search_analyzer", "search_quote_analyzer" }) {
Object analyzerO = fieldMapping.get(key);
if (analyzerO != null) {
final String analyzer = analyzerO.toString();
IndexFeatureStats stats = usedBuiltInAnalyzers.computeIfAbsent(analyzer, IndexFeatureStats::new);
stats.count += count;
if (indexAnalyzers.add(analyzer)) {
stats.indexCount += count;
}
}
}
});
}
return new AnalysisStats(
usedCharFilterTypes.values(),
usedTokenizerTypes.values(),
usedTokenFilterTypes.values(),
usedAnalyzerTypes.values(),
usedBuiltInCharFilters.values(),
usedBuiltInTokenizers.values(),
usedBuiltInTokenFilters.values(),
usedBuiltInAnalyzers.values()
);
}
public static void countMapping(Map mappingCounts, IndexMetadata indexMetadata) {
final MappingMetadata mappingMetadata = indexMetadata.mapping();
if (mappingMetadata == null) {
return;
}
mappingCounts.compute(mappingMetadata, (k, count) -> count == null ? 1 : count + 1);
}
private static void aggregateAnalysisTypes(
Collection settings,
Map stats,
Set indexTypes
) {
for (Settings analysisComponentSettings : settings) {
final String type = analysisComponentSettings.get("type");
if (type != null) {
IndexFeatureStats s = stats.computeIfAbsent(type, IndexFeatureStats::new);
s.count++;
if (indexTypes.add(type)) {
s.indexCount++;
}
}
}
}
private static Set sort(Collection set) {
List list = new ArrayList<>(set);
list.sort(Comparator.comparing(IndexFeatureStats::getName));
return Collections.unmodifiableSet(new LinkedHashSet<>(list));
}
private final Set usedCharFilters, usedTokenizers, usedTokenFilters, usedAnalyzers;
private final Set usedBuiltInCharFilters, usedBuiltInTokenizers, usedBuiltInTokenFilters, usedBuiltInAnalyzers;
AnalysisStats(
Collection usedCharFilters,
Collection usedTokenizers,
Collection usedTokenFilters,
Collection usedAnalyzers,
Collection usedBuiltInCharFilters,
Collection usedBuiltInTokenizers,
Collection usedBuiltInTokenFilters,
Collection usedBuiltInAnalyzers
) {
this.usedCharFilters = sort(usedCharFilters);
this.usedTokenizers = sort(usedTokenizers);
this.usedTokenFilters = sort(usedTokenFilters);
this.usedAnalyzers = sort(usedAnalyzers);
this.usedBuiltInCharFilters = sort(usedBuiltInCharFilters);
this.usedBuiltInTokenizers = sort(usedBuiltInTokenizers);
this.usedBuiltInTokenFilters = sort(usedBuiltInTokenFilters);
this.usedBuiltInAnalyzers = sort(usedBuiltInAnalyzers);
}
public AnalysisStats(StreamInput input) throws IOException {
usedCharFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedTokenizers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedTokenFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedAnalyzers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedBuiltInCharFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedBuiltInTokenizers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedBuiltInTokenFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedBuiltInAnalyzers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeCollection(usedCharFilters);
out.writeCollection(usedTokenizers);
out.writeCollection(usedTokenFilters);
out.writeCollection(usedAnalyzers);
out.writeCollection(usedBuiltInCharFilters);
out.writeCollection(usedBuiltInTokenizers);
out.writeCollection(usedBuiltInTokenFilters);
out.writeCollection(usedBuiltInAnalyzers);
}
/**
* Return the set of used char filters in the cluster.
*/
public Set getUsedCharFilterTypes() {
return usedCharFilters;
}
/**
* Return the set of used tokenizers in the cluster.
*/
public Set getUsedTokenizerTypes() {
return usedTokenizers;
}
/**
* Return the set of used token filters in the cluster.
*/
public Set getUsedTokenFilterTypes() {
return usedTokenFilters;
}
/**
* Return the set of used analyzers in the cluster.
*/
public Set getUsedAnalyzerTypes() {
return usedAnalyzers;
}
/**
* Return the set of used built-in char filters in the cluster.
*/
public Set getUsedBuiltInCharFilters() {
return usedBuiltInCharFilters;
}
/**
* Return the set of used built-in tokenizers in the cluster.
*/
public Set getUsedBuiltInTokenizers() {
return usedBuiltInTokenizers;
}
/**
* Return the set of used built-in token filters in the cluster.
*/
public Set getUsedBuiltInTokenFilters() {
return usedBuiltInTokenFilters;
}
/**
* Return the set of used built-in analyzers in the cluster.
*/
public Set getUsedBuiltInAnalyzers() {
return usedBuiltInAnalyzers;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
AnalysisStats that = (AnalysisStats) o;
return Objects.equals(usedCharFilters, that.usedCharFilters)
&& Objects.equals(usedTokenizers, that.usedTokenizers)
&& Objects.equals(usedTokenFilters, that.usedTokenFilters)
&& Objects.equals(usedAnalyzers, that.usedAnalyzers)
&& Objects.equals(usedBuiltInCharFilters, that.usedBuiltInCharFilters)
&& Objects.equals(usedBuiltInTokenizers, that.usedBuiltInTokenizers)
&& Objects.equals(usedBuiltInTokenFilters, that.usedBuiltInTokenFilters)
&& Objects.equals(usedBuiltInAnalyzers, that.usedBuiltInAnalyzers);
}
@Override
public int hashCode() {
return Objects.hash(
usedCharFilters,
usedTokenizers,
usedTokenFilters,
usedAnalyzers,
usedBuiltInCharFilters,
usedBuiltInTokenizers,
usedBuiltInTokenFilters,
usedBuiltInAnalyzers
);
}
private static void toXContentCollection(XContentBuilder builder, Params params, String name, Collection extends ToXContent> coll)
throws IOException {
builder.startArray(name);
for (ToXContent toXContent : coll) {
toXContent.toXContent(builder, params);
}
builder.endArray();
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("analysis");
toXContentCollection(builder, params, "char_filter_types", usedCharFilters);
toXContentCollection(builder, params, "tokenizer_types", usedTokenizers);
toXContentCollection(builder, params, "filter_types", usedTokenFilters);
toXContentCollection(builder, params, "analyzer_types", usedAnalyzers);
toXContentCollection(builder, params, "built_in_char_filters", usedBuiltInCharFilters);
toXContentCollection(builder, params, "built_in_tokenizers", usedBuiltInTokenizers);
toXContentCollection(builder, params, "built_in_filters", usedBuiltInTokenFilters);
toXContentCollection(builder, params, "built_in_analyzers", usedBuiltInAnalyzers);
builder.endObject();
return builder;
}
@Override
public String toString() {
return Strings.toString(this, true, true);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy