org.apache.druid.query.movingaverage.MovingAverageQueryRunner Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.movingaverage;
import com.google.common.base.Function;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.data.input.MapBasedRow;
import org.apache.druid.data.input.Row;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.QueryPlus;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QuerySegmentWalker;
import org.apache.druid.query.Result;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.UnionDataSource;
import org.apache.druid.query.context.ResponseContext;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.movingaverage.averagers.AveragerFactory;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.timeseries.TimeseriesQuery;
import org.apache.druid.query.timeseries.TimeseriesResultValue;
import org.apache.druid.server.QueryStats;
import org.apache.druid.server.RequestLogLine;
import org.apache.druid.server.log.RequestLogger;
import org.joda.time.Interval;
import org.joda.time.Period;
import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* The QueryRunner for MovingAverage query.
* High level flow:
* 1. Invokes an inner groupBy query (Or timeseries for no dimensions scenario) to get Aggregations/PostAggregtions.
* 2. Result is passed to {@link RowBucketIterable}, which groups rows of all dimension combinations into period-based (e.g. daily) buckets of rows ({@link RowBucket}).
* 3. The sequence is passed to {@link MovingAverageIterable}, which performs the main part of the query of adding Averagers computation into the records.
* 4. Finishes up by applying post averagers, removing redundant dates, and applying post phases (having, sorting, limits).
*/
public class MovingAverageQueryRunner implements QueryRunner
{
private final QuerySegmentWalker walker;
private final RequestLogger requestLogger;
public MovingAverageQueryRunner(
@Nullable QuerySegmentWalker walker,
RequestLogger requestLogger
)
{
this.walker = walker;
this.requestLogger = requestLogger;
}
@Override
public Sequence run(QueryPlus query, ResponseContext responseContext)
{
MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
List intervals;
final Period period;
// Get the largest bucket from the list of averagers
Optional opt =
maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
int buckets = opt.orElse(0);
//Extend the interval beginning by specified bucket - 1
if (maq.getGranularity() instanceof PeriodGranularity) {
period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
int offset = buckets <= 0 ? 0 : (1 - buckets);
intervals = maq.getIntervals()
.stream()
.map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd()))
.collect(Collectors.toList());
} else {
throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
}
Sequence resultsSeq;
DataSource dataSource = maq.getDataSource();
if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() &&
(dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource ||
dataSource instanceof QueryDataSource)) {
// build groupBy query from movingAverage query
GroupByQuery.Builder builder = GroupByQuery.builder()
.setDataSource(dataSource)
.setInterval(intervals)
.setDimFilter(maq.getFilter())
.setGranularity(maq.getGranularity())
.setDimensions(maq.getDimensions())
.setAggregatorSpecs(maq.getAggregatorSpecs())
.setPostAggregatorSpecs(maq.getPostAggregatorSpecs())
.setContext(maq.getContext());
GroupByQuery gbq = builder.build();
ResponseContext gbqResponseContext = ResponseContext.createEmpty();
gbqResponseContext.merge(responseContext);
gbqResponseContext.putQueryFailDeadlineMs(
System.currentTimeMillis() + gbq.context().getTimeout()
);
Sequence results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(
gbq,
DateTimes.nowUtc(),
"127.0.0.1",
new QueryStats(
ImmutableMap.of(
"query/time", 0,
"query/bytes", 0,
"success", true
))
));
}
catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
} else {
// no dimensions, so optimize this as a TimeSeries
TimeseriesQuery tsq = new TimeseriesQuery(
dataSource,
new MultipleIntervalSegmentSpec(intervals),
false,
null,
maq.getFilter(),
maq.getGranularity(),
maq.getAggregatorSpecs(),
maq.getPostAggregatorSpecs(),
0,
maq.getContext()
);
ResponseContext tsqResponseContext = ResponseContext.createEmpty();
tsqResponseContext.merge(responseContext);
tsqResponseContext.putQueryFailDeadlineMs(
System.currentTimeMillis() + tsq.context().getTimeout()
);
Sequence> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(
tsq,
DateTimes.nowUtc(),
"127.0.0.1",
new QueryStats(
ImmutableMap.of(
"query/time", 0,
"query/bytes", 0,
"success", true
))
));
}
catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
}
// Process into period buckets
Sequence bucketedMovingAvgResults =
Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
// Apply the windows analysis functions
Sequence movingAvgResults = Sequences.simple(
new MovingAverageIterable(
bucketedMovingAvgResults,
maq.getDimensions(),
maq.getAveragerSpecs(),
maq.getPostAggregatorSpecs(),
maq.getAggregatorSpecs()
)
);
// Apply any postAveragers
Sequence movingAvgResultsWithPostAveragers =
Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
// remove rows outside the reporting window
List reportingIntervals = maq.getIntervals();
movingAvgResults =
Sequences.filter(
movingAvgResultsWithPostAveragers,
row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp()))
);
// Apply any having, sorting, and limits
movingAvgResults = maq.applyLimit(movingAvgResults);
return movingAvgResults;
}
static class TimeseriesResultToRow implements Function, Row>
{
@Override
public Row apply(Result lookbackResult)
{
Map event = lookbackResult.getValue().getBaseObject();
MapBasedRow row = new MapBasedRow(lookbackResult.getTimestamp(), event);
return row;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy