org.elasticsearch.search.aggregations.pipeline.movavg.MovAvgPipelineAggregator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.pipeline.movavg;
import org.elasticsearch.common.collect.EvictingQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramFactory;
import org.elasticsearch.search.aggregations.pipeline.BucketHelpers.GapPolicy;
import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.pipeline.movavg.models.MovAvgModel;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static org.elasticsearch.search.aggregations.pipeline.BucketHelpers.resolveBucketValue;
public class MovAvgPipelineAggregator extends PipelineAggregator {
private final DocValueFormat formatter;
private final GapPolicy gapPolicy;
private final int window;
private MovAvgModel model;
private final int predict;
private final boolean minimize;
public MovAvgPipelineAggregator(String name, String[] bucketsPaths, DocValueFormat formatter, GapPolicy gapPolicy,
int window, int predict, MovAvgModel model, boolean minimize, Map metadata) {
super(name, bucketsPaths, metadata);
this.formatter = formatter;
this.gapPolicy = gapPolicy;
this.window = window;
this.model = model;
this.predict = predict;
this.minimize = minimize;
}
/**
* Read from a stream.
*/
public MovAvgPipelineAggregator(StreamInput in) throws IOException {
super(in);
formatter = in.readNamedWriteable(DocValueFormat.class);
gapPolicy = GapPolicy.readFrom(in);
window = in.readVInt();
predict = in.readVInt();
model = in.readNamedWriteable(MovAvgModel.class);
minimize = in.readBoolean();
}
@Override
public void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(formatter);
gapPolicy.writeTo(out);
out.writeVInt(window);
out.writeVInt(predict);
out.writeNamedWriteable(model);
out.writeBoolean(minimize);
}
@Override
public String getWriteableName() {
return MovAvgPipelineAggregationBuilder.NAME;
}
@Override
public InternalAggregation reduce(InternalAggregation aggregation, ReduceContext reduceContext) {
MultiBucketsAggregation histo = (MultiBucketsAggregation) aggregation;
List buckets = histo.getBuckets();
HistogramFactory factory = (HistogramFactory) histo;
List newBuckets = new ArrayList<>();
EvictingQueue values = new EvictingQueue<>(this.window);
Number lastValidKey = 0;
int lastValidPosition = 0;
int counter = 0;
// Do we need to fit the model parameters to the data?
if (minimize) {
assert (model.canBeMinimized());
model = minimize(buckets, histo, model);
}
for (Bucket bucket : buckets) {
Double thisBucketValue = resolveBucketValue(histo, bucket, bucketsPaths()[0], gapPolicy);
// Default is to reuse existing bucket. Simplifies the rest of the logic,
// since we only change newBucket if we can add to it
Bucket newBucket = bucket;
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
// Some models (e.g. HoltWinters) have certain preconditions that must be met
if (model.hasValue(values.size())) {
double movavg = model.next(values);
List aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false).map((p) -> {
return (InternalAggregation) p;
}).collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), movavg, formatter, new ArrayList(), metaData()));
newBucket = factory.createBucket(factory.getKey(bucket), bucket.getDocCount(), new InternalAggregations(aggs));
}
if (predict > 0) {
lastValidKey = factory.getKey(bucket);
lastValidPosition = counter;
}
values.offer(thisBucketValue);
}
counter += 1;
newBuckets.add(newBucket);
}
if (buckets.size() > 0 && predict > 0) {
double[] predictions = model.predict(values, predict);
for (int i = 0; i < predictions.length; i++) {
List aggs;
Number newKey = factory.nextKey(lastValidKey);
if (lastValidPosition + i + 1 < newBuckets.size()) {
Bucket bucket = newBuckets.get(lastValidPosition + i + 1);
// Get the existing aggs in the bucket so we don't clobber data
aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false).map((p) -> {
return (InternalAggregation) p;
}).collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, new ArrayList(), metaData()));
Bucket newBucket = factory.createBucket(newKey, 0, new InternalAggregations(aggs));
// Overwrite the existing bucket with the new version
newBuckets.set(lastValidPosition + i + 1, newBucket);
} else {
// Not seen before, create fresh
aggs = new ArrayList<>();
aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, new ArrayList(), metaData()));
Bucket newBucket = factory.createBucket(newKey, 0, new InternalAggregations(aggs));
// Since this is a new bucket, simply append it
newBuckets.add(newBucket);
}
lastValidKey = newKey;
}
}
return factory.createAggregation(newBuckets);
}
private MovAvgModel minimize(List buckets, MultiBucketsAggregation histo, MovAvgModel model) {
int counter = 0;
EvictingQueue values = new EvictingQueue<>(this.window);
double[] test = new double[window];
ListIterator iter = buckets.listIterator(buckets.size());
// We have to walk the iterator backwards because we don't know if/how many buckets are empty.
while (iter.hasPrevious() && counter < window) {
Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
test[window - counter - 1] = thisBucketValue;
counter += 1;
}
}
// If we didn't fill the test set, we don't have enough data to minimize.
// Just return the model with the starting coef
if (counter < window) {
return model;
}
//And do it again, for the train set. Unfortunately we have to fill an array and then
//fill an evicting queue backwards :(
counter = 0;
double[] train = new double[window];
while (iter.hasPrevious() && counter < window) {
Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
train[window - counter - 1] = thisBucketValue;
counter += 1;
}
}
// If we didn't fill the train set, we don't have enough data to minimize.
// Just return the model with the starting coef
if (counter < window) {
return model;
}
for (double v : train) {
values.add(v);
}
return SimulatedAnealingMinimizer.minimize(model, values, test);
}
}