org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.AucRoc Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.PipelineAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.EvaluationFields;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.EvaluationMetricResult;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.EvaluationParameters;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.common.AbstractAucRoc;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.IntStream;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider.registeredMetricName;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection.actualIsTrueQuery;
/**
* Area under the curve (AUC) of the receiver operating characteristic (ROC).
* The ROC curve is a plot of the TPR (true positive rate) against
* the FPR (false positive rate) over a varying threshold.
*
* This particular implementation is making use of ES aggregations
* to calculate the curve. It then uses the trapezoidal rule to calculate
* the AUC.
*
* In particular, in order to calculate the ROC, we get percentiles of TP
* and FP against the predicted probability. We call those Rate-Threshold
* curves. We then scan ROC points from each Rate-Threshold curve against the
* other using interpolation. This gives us an approximation of the ROC curve
* that has the advantage of being efficient and resilient to some edge cases.
*
* When this is used for multi-class classification, it will calculate the ROC
* curve of each class versus the rest.
*/
public class AucRoc extends AbstractAucRoc {
public static final ParseField INCLUDE_CURVE = new ParseField("include_curve");
public static final ConstructingObjectParser PARSER =
new ConstructingObjectParser<>(NAME.getPreferredName(), a -> new AucRoc((Boolean) a[0]));
static {
PARSER.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), INCLUDE_CURVE);
}
private static final String TRUE_AGG_NAME = NAME.getPreferredName() + "_true";
private static final String NON_TRUE_AGG_NAME = NAME.getPreferredName() + "_non_true";
private static final String PERCENTILES_AGG_NAME = "percentiles";
public static AucRoc fromXContent(XContentParser parser) {
return PARSER.apply(parser, null);
}
private final boolean includeCurve;
private final SetOnce fields = new SetOnce<>();
private final SetOnce result = new SetOnce<>();
public AucRoc(Boolean includeCurve) {
this.includeCurve = includeCurve == null ? false : includeCurve;
}
public AucRoc(StreamInput in) throws IOException {
this.includeCurve = in.readBoolean();
}
@Override
public String getWriteableName() {
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(includeCurve);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(INCLUDE_CURVE.getPreferredName(), includeCurve);
builder.endObject();
return builder;
}
@Override
public Set getRequiredFields() {
return Sets.newHashSet(
EvaluationFields.ACTUAL_FIELD.getPreferredName(), EvaluationFields.PREDICTED_PROBABILITY_FIELD.getPreferredName());
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
AucRoc that = (AucRoc) o;
return includeCurve == that.includeCurve;
}
@Override
public int hashCode() {
return Objects.hash(includeCurve);
}
@Override
public Tuple, List> aggs(EvaluationParameters parameters,
EvaluationFields fields) {
if (result.get() != null) {
return Tuple.tuple(Collections.emptyList(), Collections.emptyList());
}
// Store given {@code fields} for the purpose of generating error messages in {@code process}.
this.fields.trySet(fields);
String actualField = fields.getActualField();
String predictedProbabilityField = fields.getPredictedProbabilityField();
double[] percentiles = IntStream.range(1, 100).mapToDouble(v -> (double) v).toArray();
AggregationBuilder percentilesAgg =
AggregationBuilders
.percentiles(PERCENTILES_AGG_NAME)
.field(predictedProbabilityField)
.percentiles(percentiles);
AggregationBuilder percentilesForClassValueAgg =
AggregationBuilders
.filter(TRUE_AGG_NAME, actualIsTrueQuery(actualField))
.subAggregation(percentilesAgg);
AggregationBuilder percentilesForRestAgg =
AggregationBuilders
.filter(NON_TRUE_AGG_NAME, QueryBuilders.boolQuery().mustNot(actualIsTrueQuery(actualField)))
.subAggregation(percentilesAgg);
return Tuple.tuple(
Arrays.asList(percentilesForClassValueAgg, percentilesForRestAgg),
Collections.emptyList());
}
@Override
public void process(Aggregations aggs) {
if (result.get() != null) {
return;
}
Filter classAgg = aggs.get(TRUE_AGG_NAME);
if (classAgg.getDocCount() == 0) {
throw ExceptionsHelper.badRequestException(
"[{}] requires at least one [{}] to have the value [{}]", getName(), fields.get().getActualField(), "true");
}
double[] tpPercentiles = percentilesArray(classAgg.getAggregations().get(PERCENTILES_AGG_NAME));
Filter restAgg = aggs.get(NON_TRUE_AGG_NAME);
if (restAgg.getDocCount() == 0) {
throw ExceptionsHelper.badRequestException(
"[{}] requires at least one [{}] to have a different value than [{}]", getName(), fields.get().getActualField(), "true");
}
double[] fpPercentiles = percentilesArray(restAgg.getAggregations().get(PERCENTILES_AGG_NAME));
List aucRocCurve = buildAucRocCurve(tpPercentiles, fpPercentiles);
double aucRocScore = calculateAucScore(aucRocCurve);
result.set(new Result(aucRocScore, includeCurve ? aucRocCurve : Collections.emptyList()));
}
@Override
public Optional getResult() {
return Optional.ofNullable(result.get());
}
}