org.elasticsearch.client.ml.job.config.AnalysisConfig Maven / Gradle / Ivy
Show all versions of elasticsearch-rest-high-level-client Show documentation
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.client.ml.job.config;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ObjectParser;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
/**
* Analysis configuration options that describe which fields are
* analyzed and which functions are used to detect anomalies.
*
* The configuration can contain multiple detectors, a new anomaly detector will
* be created for each detector configuration. The fields
* bucketSpan, summaryCountFieldName and categorizationFieldName
* apply to all detectors.
*
* If a value has not been set it will be null
* Object wrappers are used around integral types & booleans so they can take
* null
values.
*/
public class AnalysisConfig implements ToXContentObject {
/**
* Serialisation names
*/
public static final ParseField ANALYSIS_CONFIG = new ParseField("analysis_config");
public static final ParseField BUCKET_SPAN = new ParseField("bucket_span");
public static final ParseField CATEGORIZATION_FIELD_NAME = new ParseField("categorization_field_name");
public static final ParseField CATEGORIZATION_FILTERS = new ParseField("categorization_filters");
public static final ParseField CATEGORIZATION_ANALYZER = CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER;
public static final ParseField PER_PARTITION_CATEGORIZATION = new ParseField("per_partition_categorization");
public static final ParseField LATENCY = new ParseField("latency");
public static final ParseField SUMMARY_COUNT_FIELD_NAME = new ParseField("summary_count_field_name");
public static final ParseField DETECTORS = new ParseField("detectors");
public static final ParseField INFLUENCERS = new ParseField("influencers");
public static final ParseField MULTIVARIATE_BY_FIELDS = new ParseField("multivariate_by_fields");
public static final ParseField MODEL_PRUNE_WINDOW = new ParseField("model_prune_window");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
ANALYSIS_CONFIG.getPreferredName(),
true,
a -> new AnalysisConfig.Builder((List) a[0])
);
static {
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), (p, c) -> (Detector.PARSER).apply(p, c).build(), DETECTORS);
PARSER.declareString(
(builder, val) -> builder.setBucketSpan(TimeValue.parseTimeValue(val, BUCKET_SPAN.getPreferredName())),
BUCKET_SPAN
);
PARSER.declareString(Builder::setCategorizationFieldName, CATEGORIZATION_FIELD_NAME);
PARSER.declareStringArray(Builder::setCategorizationFilters, CATEGORIZATION_FILTERS);
// This one is nasty - the syntax for analyzers takes either names or objects at many levels, hence it's not
// possible to simply declare whether the field is a string or object and a completely custom parser is required
PARSER.declareField(
Builder::setCategorizationAnalyzerConfig,
(p, c) -> CategorizationAnalyzerConfig.buildFromXContentFragment(p),
CATEGORIZATION_ANALYZER,
ObjectParser.ValueType.OBJECT_OR_STRING
);
PARSER.declareObject(
Builder::setPerPartitionCategorizationConfig,
PerPartitionCategorizationConfig.PARSER,
PER_PARTITION_CATEGORIZATION
);
PARSER.declareString((builder, val) -> builder.setLatency(TimeValue.parseTimeValue(val, LATENCY.getPreferredName())), LATENCY);
PARSER.declareString(Builder::setSummaryCountFieldName, SUMMARY_COUNT_FIELD_NAME);
PARSER.declareStringArray(Builder::setInfluencers, INFLUENCERS);
PARSER.declareBoolean(Builder::setMultivariateByFields, MULTIVARIATE_BY_FIELDS);
PARSER.declareString(
(builder, val) -> builder.setModelPruneWindow(TimeValue.parseTimeValue(val, MODEL_PRUNE_WINDOW.getPreferredName())),
MODEL_PRUNE_WINDOW
);
}
/**
* These values apply to all detectors
*/
private final TimeValue bucketSpan;
private final String categorizationFieldName;
private final List categorizationFilters;
private final CategorizationAnalyzerConfig categorizationAnalyzerConfig;
private final PerPartitionCategorizationConfig perPartitionCategorizationConfig;
private final TimeValue latency;
private final String summaryCountFieldName;
private final List detectors;
private final List influencers;
private final Boolean multivariateByFields;
private final TimeValue modelPruneWindow;
private AnalysisConfig(
TimeValue bucketSpan,
String categorizationFieldName,
List categorizationFilters,
CategorizationAnalyzerConfig categorizationAnalyzerConfig,
PerPartitionCategorizationConfig perPartitionCategorizationConfig,
TimeValue latency,
String summaryCountFieldName,
List detectors,
List influencers,
Boolean multivariateByFields,
TimeValue modelPruneWindow
) {
this.detectors = Collections.unmodifiableList(detectors);
this.bucketSpan = bucketSpan;
this.latency = latency;
this.categorizationFieldName = categorizationFieldName;
this.categorizationAnalyzerConfig = categorizationAnalyzerConfig;
this.perPartitionCategorizationConfig = perPartitionCategorizationConfig;
this.categorizationFilters = categorizationFilters == null ? null : Collections.unmodifiableList(categorizationFilters);
this.summaryCountFieldName = summaryCountFieldName;
this.influencers = Collections.unmodifiableList(influencers);
this.multivariateByFields = multivariateByFields;
this.modelPruneWindow = modelPruneWindow;
}
/**
* The analysis bucket span
*
* @return The bucketspan or null
if not set
*/
public TimeValue getBucketSpan() {
return bucketSpan;
}
public String getCategorizationFieldName() {
return categorizationFieldName;
}
public List getCategorizationFilters() {
return categorizationFilters;
}
public CategorizationAnalyzerConfig getCategorizationAnalyzerConfig() {
return categorizationAnalyzerConfig;
}
public PerPartitionCategorizationConfig getPerPartitionCategorizationConfig() {
return perPartitionCategorizationConfig;
}
/**
* The latency interval during which out-of-order records should be handled.
*
* @return The latency interval or null
if not set
*/
public TimeValue getLatency() {
return latency;
}
/**
* The name of the field that contains counts for pre-summarised input
*
* @return The field name or null
if not set
*/
public String getSummaryCountFieldName() {
return summaryCountFieldName;
}
/**
* The list of analysis detectors. In a valid configuration the list should
* contain at least 1 {@link Detector}
*
* @return The Detectors used in this job
*/
public List getDetectors() {
return detectors;
}
/**
* The list of influence field names
*/
public List getInfluencers() {
return influencers;
}
public Boolean getMultivariateByFields() {
return multivariateByFields;
}
public TimeValue getModelPruneWindow() {
return modelPruneWindow;
}
private static void addIfNotNull(Set fields, String field) {
if (field != null) {
fields.add(field);
}
}
public List fields() {
return collectNonNullAndNonEmptyDetectorFields(Detector::getFieldName);
}
private List collectNonNullAndNonEmptyDetectorFields(Function fieldGetter) {
Set fields = new HashSet<>();
for (Detector d : getDetectors()) {
addIfNotNull(fields, fieldGetter.apply(d));
}
// remove empty strings
fields.remove("");
return new ArrayList<>(fields);
}
public List byFields() {
return collectNonNullAndNonEmptyDetectorFields(Detector::getByFieldName);
}
public List overFields() {
return collectNonNullAndNonEmptyDetectorFields(Detector::getOverFieldName);
}
public List partitionFields() {
return collectNonNullAndNonEmptyDetectorFields(Detector::getPartitionFieldName);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (bucketSpan != null) {
builder.field(BUCKET_SPAN.getPreferredName(), bucketSpan.getStringRep());
}
if (categorizationFieldName != null) {
builder.field(CATEGORIZATION_FIELD_NAME.getPreferredName(), categorizationFieldName);
}
if (categorizationFilters != null) {
builder.field(CATEGORIZATION_FILTERS.getPreferredName(), categorizationFilters);
}
if (categorizationAnalyzerConfig != null) {
// This cannot be builder.field(CATEGORIZATION_ANALYZER.getPreferredName(), categorizationAnalyzerConfig, params);
// because that always writes categorizationAnalyzerConfig as an object, and in the case of a global analyzer it
// gets written as a single string.
categorizationAnalyzerConfig.toXContent(builder, params);
}
if (perPartitionCategorizationConfig != null) {
builder.field(PER_PARTITION_CATEGORIZATION.getPreferredName(), perPartitionCategorizationConfig);
}
if (latency != null) {
builder.field(LATENCY.getPreferredName(), latency.getStringRep());
}
if (summaryCountFieldName != null) {
builder.field(SUMMARY_COUNT_FIELD_NAME.getPreferredName(), summaryCountFieldName);
}
builder.startArray(DETECTORS.getPreferredName());
for (Detector detector : detectors) {
detector.toXContent(builder, params);
}
builder.endArray();
builder.field(INFLUENCERS.getPreferredName(), influencers);
if (multivariateByFields != null) {
builder.field(MULTIVARIATE_BY_FIELDS.getPreferredName(), multivariateByFields);
}
if (modelPruneWindow != null) {
builder.field(MODEL_PRUNE_WINDOW.getPreferredName(), modelPruneWindow.getStringRep());
}
builder.endObject();
return builder;
}
@Override
public boolean equals(Object object) {
if (this == object) {
return true;
}
if (object == null || getClass() != object.getClass()) {
return false;
}
AnalysisConfig that = (AnalysisConfig) object;
return Objects.equals(latency, that.latency)
&& Objects.equals(bucketSpan, that.bucketSpan)
&& Objects.equals(categorizationFieldName, that.categorizationFieldName)
&& Objects.equals(categorizationFilters, that.categorizationFilters)
&& Objects.equals(categorizationAnalyzerConfig, that.categorizationAnalyzerConfig)
&& Objects.equals(perPartitionCategorizationConfig, that.perPartitionCategorizationConfig)
&& Objects.equals(summaryCountFieldName, that.summaryCountFieldName)
&& Objects.equals(detectors, that.detectors)
&& Objects.equals(influencers, that.influencers)
&& Objects.equals(multivariateByFields, that.multivariateByFields)
&& Objects.equals(modelPruneWindow, that.modelPruneWindow);
}
@Override
public int hashCode() {
return Objects.hash(
bucketSpan,
categorizationFieldName,
categorizationFilters,
categorizationAnalyzerConfig,
perPartitionCategorizationConfig,
latency,
summaryCountFieldName,
detectors,
influencers,
multivariateByFields,
modelPruneWindow
);
}
public static Builder builder(List detectors) {
return new Builder(detectors);
}
public static class Builder {
private List detectors;
private TimeValue bucketSpan;
private TimeValue latency;
private String categorizationFieldName;
private List categorizationFilters;
private CategorizationAnalyzerConfig categorizationAnalyzerConfig;
private PerPartitionCategorizationConfig perPartitionCategorizationConfig;
private String summaryCountFieldName;
private List influencers = new ArrayList<>();
private Boolean multivariateByFields;
private TimeValue modelPruneWindow;
public Builder(List detectors) {
setDetectors(detectors);
}
public Builder(AnalysisConfig analysisConfig) {
this.detectors = new ArrayList<>(analysisConfig.detectors);
this.bucketSpan = analysisConfig.bucketSpan;
this.latency = analysisConfig.latency;
this.categorizationFieldName = analysisConfig.categorizationFieldName;
this.categorizationFilters = analysisConfig.categorizationFilters == null
? null
: new ArrayList<>(analysisConfig.categorizationFilters);
this.categorizationAnalyzerConfig = analysisConfig.categorizationAnalyzerConfig;
this.perPartitionCategorizationConfig = analysisConfig.perPartitionCategorizationConfig;
this.summaryCountFieldName = analysisConfig.summaryCountFieldName;
this.influencers = new ArrayList<>(analysisConfig.influencers);
this.multivariateByFields = analysisConfig.multivariateByFields;
this.modelPruneWindow = analysisConfig.modelPruneWindow;
}
public Builder setDetectors(List detectors) {
Objects.requireNonNull(detectors, "[" + DETECTORS.getPreferredName() + "] must not be null");
// We always assign sequential IDs to the detectors that are correct for this analysis config
int detectorIndex = 0;
List sequentialIndexDetectors = new ArrayList<>(detectors.size());
for (Detector origDetector : detectors) {
Detector.Builder builder = new Detector.Builder(origDetector);
builder.setDetectorIndex(detectorIndex++);
sequentialIndexDetectors.add(builder.build());
}
this.detectors = sequentialIndexDetectors;
return this;
}
public Builder setDetector(int detectorIndex, Detector detector) {
detectors.set(detectorIndex, detector);
return this;
}
public Builder setBucketSpan(TimeValue bucketSpan) {
this.bucketSpan = bucketSpan;
return this;
}
public Builder setLatency(TimeValue latency) {
this.latency = latency;
return this;
}
public Builder setCategorizationFieldName(String categorizationFieldName) {
this.categorizationFieldName = categorizationFieldName;
return this;
}
public Builder setCategorizationFilters(List categorizationFilters) {
this.categorizationFilters = categorizationFilters;
return this;
}
public Builder setCategorizationAnalyzerConfig(CategorizationAnalyzerConfig categorizationAnalyzerConfig) {
this.categorizationAnalyzerConfig = categorizationAnalyzerConfig;
return this;
}
public Builder setPerPartitionCategorizationConfig(PerPartitionCategorizationConfig perPartitionCategorizationConfig) {
this.perPartitionCategorizationConfig = perPartitionCategorizationConfig;
return this;
}
public Builder setSummaryCountFieldName(String summaryCountFieldName) {
this.summaryCountFieldName = summaryCountFieldName;
return this;
}
public Builder setInfluencers(List influencers) {
this.influencers = Objects.requireNonNull(influencers, INFLUENCERS.getPreferredName());
return this;
}
public Builder setMultivariateByFields(Boolean multivariateByFields) {
this.multivariateByFields = multivariateByFields;
return this;
}
public Builder setModelPruneWindow(TimeValue modelPruneWindow) {
this.modelPruneWindow = modelPruneWindow;
return this;
}
public AnalysisConfig build() {
return new AnalysisConfig(
bucketSpan,
categorizationFieldName,
categorizationFilters,
categorizationAnalyzerConfig,
perPartitionCategorizationConfig,
latency,
summaryCountFieldName,
detectors,
influencers,
multivariateByFields,
modelPruneWindow
);
}
}
}