org.opensearch.search.aggregations.pipeline.MovAvgPipelineAggregator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.search.aggregations.pipeline;
import org.opensearch.common.collect.EvictingQueue;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.InternalAggregation;
import org.opensearch.search.aggregations.InternalAggregation.ReduceContext;
import org.opensearch.search.aggregations.InternalAggregations;
import org.opensearch.search.aggregations.InternalMultiBucketAggregation;
import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;
import org.opensearch.search.aggregations.bucket.histogram.HistogramFactory;
import org.opensearch.search.aggregations.pipeline.BucketHelpers.GapPolicy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static org.opensearch.search.aggregations.pipeline.BucketHelpers.resolveBucketValue;
/**
* Aggregate all docs into a moving average bucket
*
* @opensearch.internal
*/
public class MovAvgPipelineAggregator extends PipelineAggregator {
private final DocValueFormat formatter;
private final GapPolicy gapPolicy;
private final int window;
private MovAvgModel model;
private final int predict;
private final boolean minimize;
MovAvgPipelineAggregator(
String name,
String[] bucketsPaths,
DocValueFormat formatter,
GapPolicy gapPolicy,
int window,
int predict,
MovAvgModel model,
boolean minimize,
Map metadata
) {
super(name, bucketsPaths, metadata);
this.formatter = formatter;
this.gapPolicy = gapPolicy;
this.window = window;
this.model = model;
this.predict = predict;
this.minimize = minimize;
}
/**
* Read from a stream.
*/
public MovAvgPipelineAggregator(StreamInput in) throws IOException {
super(in);
formatter = in.readNamedWriteable(DocValueFormat.class);
gapPolicy = GapPolicy.readFrom(in);
window = in.readVInt();
predict = in.readVInt();
model = in.readNamedWriteable(MovAvgModel.class);
minimize = in.readBoolean();
}
@Override
public void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(formatter);
gapPolicy.writeTo(out);
out.writeVInt(window);
out.writeVInt(predict);
out.writeNamedWriteable(model);
out.writeBoolean(minimize);
}
@Override
public String getWriteableName() {
return MovAvgPipelineAggregationBuilder.NAME;
}
@Override
public InternalAggregation reduce(InternalAggregation aggregation, ReduceContext reduceContext) {
InternalMultiBucketAggregation<
? extends InternalMultiBucketAggregation,
? extends InternalMultiBucketAggregation.InternalBucket> histo = (InternalMultiBucketAggregation<
? extends InternalMultiBucketAggregation,
? extends InternalMultiBucketAggregation.InternalBucket>) aggregation;
List extends InternalMultiBucketAggregation.InternalBucket> buckets = histo.getBuckets();
HistogramFactory factory = (HistogramFactory) histo;
List newBuckets = new ArrayList<>();
EvictingQueue values = new EvictingQueue<>(this.window);
Number lastValidKey = 0;
int lastValidPosition = 0;
int counter = 0;
// Do we need to fit the model parameters to the data?
if (minimize) {
assert (model.canBeMinimized());
model = minimize(buckets, histo, model);
}
for (InternalMultiBucketAggregation.InternalBucket bucket : buckets) {
Double thisBucketValue = resolveBucketValue(histo, bucket, bucketsPaths()[0], gapPolicy);
// Default is to reuse existing bucket. Simplifies the rest of the logic,
// since we only change newBucket if we can add to it
Bucket newBucket = bucket;
if ((thisBucketValue == null || thisBucketValue.equals(Double.NaN)) == false) {
// Some models (e.g. HoltWinters) have certain preconditions that must be met
if (model.hasValue(values.size())) {
double movavg = model.next(values);
List aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false)
.map((p) -> (InternalAggregation) p)
.collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), movavg, formatter, metadata()));
newBucket = factory.createBucket(factory.getKey(bucket), bucket.getDocCount(), InternalAggregations.from(aggs));
}
if (predict > 0) {
lastValidKey = factory.getKey(bucket);
lastValidPosition = counter;
}
values.offer(thisBucketValue);
}
counter += 1;
newBuckets.add(newBucket);
}
if (buckets.size() > 0 && predict > 0) {
double[] predictions = model.predict(values, predict);
for (int i = 0; i < predictions.length; i++) {
List aggs;
Number newKey = factory.nextKey(lastValidKey);
if (lastValidPosition + i + 1 < newBuckets.size()) {
Bucket bucket = newBuckets.get(lastValidPosition + i + 1);
// Get the existing aggs in the bucket so we don't clobber data
aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false)
.map((p) -> (InternalAggregation) p)
.collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, metadata()));
Bucket newBucket = factory.createBucket(newKey, bucket.getDocCount(), InternalAggregations.from(aggs));
// Overwrite the existing bucket with the new version
newBuckets.set(lastValidPosition + i + 1, newBucket);
} else {
// Not seen before, create fresh
aggs = new ArrayList<>();
aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, metadata()));
Bucket newBucket = factory.createBucket(newKey, 0, InternalAggregations.from(aggs));
// Since this is a new bucket, simply append it
newBuckets.add(newBucket);
}
lastValidKey = newKey;
}
}
return factory.createAggregation(newBuckets);
}
private MovAvgModel minimize(
List extends InternalMultiBucketAggregation.InternalBucket> buckets,
MultiBucketsAggregation histo,
MovAvgModel model
) {
int counter = 0;
EvictingQueue values = new EvictingQueue<>(this.window);
double[] test = new double[window];
ListIterator extends InternalMultiBucketAggregation.InternalBucket> iter = buckets.listIterator(buckets.size());
// We have to walk the iterator backwards because we don't know if/how many buckets are empty.
while (iter.hasPrevious() && counter < window) {
Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
test[window - counter - 1] = thisBucketValue;
counter += 1;
}
}
// If we didn't fill the test set, we don't have enough data to minimize.
// Just return the model with the starting coef
if (counter < window) {
return model;
}
// And do it again, for the train set. Unfortunately we have to fill an array and then
// fill an evicting queue backwards :(
counter = 0;
double[] train = new double[window];
while (iter.hasPrevious() && counter < window) {
Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
train[window - counter - 1] = thisBucketValue;
counter += 1;
}
}
// If we didn't fill the train set, we don't have enough data to minimize.
// Just return the model with the starting coef
if (counter < window) {
return model;
}
for (double v : train) {
values.add(v);
}
return SimulatedAnealingMinimizer.minimize(model, values, test);
}
}