Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest Maven / Gradle / Ivy
Go to download
A module that is everything required to understands Druid Segments
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.timeseries;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.inject.Inject;
import org.apache.commons.lang.StringUtils;
import org.apache.druid.data.input.MapBasedRow;
import org.apache.druid.frame.Frame;
import org.apache.druid.frame.FrameType;
import org.apache.druid.frame.allocation.MemoryAllocatorFactory;
import org.apache.druid.frame.segment.FrameCursorUtils;
import org.apache.druid.frame.write.FrameWriterFactory;
import org.apache.druid.frame.write.FrameWriterUtils;
import org.apache.druid.frame.write.FrameWriters;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.query.CacheStrategy;
import org.apache.druid.query.FrameSignaturePair;
import org.apache.druid.query.IterableRowsCursorHelper;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryPlus;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.query.Result;
import org.apache.druid.query.ResultGranularTimestampComparator;
import org.apache.druid.query.ResultMergeQueryRunner;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.MetricManipulationFn;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.context.ResponseContext;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.RowAdapters;
import org.apache.druid.segment.RowBasedColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.joda.time.DateTime;
import java.io.Closeable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.BinaryOperator;
/**
*
*/
public class TimeseriesQueryQueryToolChest extends QueryToolChest, TimeseriesQuery>
{
private static final byte TIMESERIES_QUERY = 0x0;
private static final TypeReference OBJECT_TYPE_REFERENCE =
new TypeReference()
{
};
private static final TypeReference> TYPE_REFERENCE =
new TypeReference>()
{
};
private final TimeseriesQueryMetricsFactory queryMetricsFactory;
@VisibleForTesting
public TimeseriesQueryQueryToolChest()
{
this(DefaultTimeseriesQueryMetricsFactory.instance());
}
@Inject
public TimeseriesQueryQueryToolChest(TimeseriesQueryMetricsFactory queryMetricsFactory)
{
this.queryMetricsFactory = queryMetricsFactory;
}
@Override
public QueryRunner> mergeResults(
QueryRunner> queryRunner
)
{
final QueryRunner> resultMergeQueryRunner = new ResultMergeQueryRunner>(
queryRunner,
this::createResultComparator,
this::createMergeFn
)
{
@Override
public Sequence> doRun(
QueryRunner> baseRunner,
QueryPlus> queryPlus,
ResponseContext context
)
{
int limit = ((TimeseriesQuery) queryPlus.getQuery()).getLimit();
Sequence> result = super.doRun(
baseRunner,
// Don't do post aggs until makePostComputeManipulatorFn() is called
queryPlus.withQuery(((TimeseriesQuery) queryPlus.getQuery()).withPostAggregatorSpecs(ImmutableList.of())),
context
);
if (limit < Integer.MAX_VALUE) {
return result.limit(limit);
}
return result;
}
};
return (queryPlus, responseContext) -> {
final TimeseriesQuery query = (TimeseriesQuery) queryPlus.getQuery();
final Sequence> baseResults = resultMergeQueryRunner.run(
queryPlus.withQuery(
queryPlus.getQuery()
.withOverriddenContext(
ImmutableMap.of(TimeseriesQuery.CTX_GRAND_TOTAL, false)
)
),
responseContext
);
final Sequence> finalSequence;
// When granularity = ALL, there is no grouping key for this query.
// To be more sql-compliant, we should return something (e.g., 0 for count queries) even when
// the sequence is empty.
if (query.getGranularity().equals(Granularities.ALL) &&
// Returns empty sequence if this query allows skipping empty buckets
!query.isSkipEmptyBuckets() &&
// Returns empty sequence if bySegment is set because bySegment results are mostly used for
// caching in historicals or debugging where the exact results are preferred.
!query.context().isBySegment()) {
// Usally it is NOT Okay to materialize results via toList(), but Granularity is ALL thus
// we have only one record.
final List> val = baseResults.toList();
finalSequence = val.isEmpty() ? Sequences.simple(Collections.singletonList(
getNullTimeseriesResultValue(query))) : Sequences.simple(val);
} else {
finalSequence = baseResults;
}
if (query.isGrandTotal()) {
// Accumulate grand totals while iterating the sequence.
final Object[] grandTotals = new Object[query.getAggregatorSpecs().size()];
final Sequence> mappedSequence = Sequences.map(
finalSequence,
resultValue -> {
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
final AggregatorFactory aggregatorFactory = query.getAggregatorSpecs().get(i);
final Object value = resultValue.getValue().getMetric(aggregatorFactory.getName());
if (grandTotals[i] == null) {
grandTotals[i] = value;
} else {
grandTotals[i] = aggregatorFactory.combine(grandTotals[i], value);
}
}
return resultValue;
}
);
return Sequences.concat(
ImmutableList.of(
mappedSequence,
Sequences.simple(
() -> {
final Map totalsMap = new HashMap<>();
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
totalsMap.put(query.getAggregatorSpecs().get(i).getName(), grandTotals[i]);
}
final Result result = new Result<>(
null,
new TimeseriesResultValue(totalsMap)
);
return Collections.singletonList(result).iterator();
}
)
)
);
} else {
return finalSequence;
}
};
}
@Override
public BinaryOperator> createMergeFn(
Query> query
)
{
TimeseriesQuery timeseriesQuery = (TimeseriesQuery) query;
return new TimeseriesBinaryFn(timeseriesQuery.getGranularity(), timeseriesQuery.getAggregatorSpecs());
}
@Override
public Comparator> createResultComparator(Query> query)
{
return ResultGranularTimestampComparator.create(query.getGranularity(), query.isDescending());
}
private Result getNullTimeseriesResultValue(TimeseriesQuery query)
{
List aggregatorSpecs = query.getAggregatorSpecs();
Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()];
String[] aggregatorNames = new String[aggregatorSpecs.size()];
RowSignature aggregatorsSignature =
RowSignature.builder().addAggregators(aggregatorSpecs, RowSignature.Finalization.UNKNOWN).build();
for (int i = 0; i < aggregatorSpecs.size(); i++) {
aggregators[i] =
aggregatorSpecs.get(i)
.factorize(
RowBasedColumnSelectorFactory.create(
RowAdapters.standardRow(),
() -> new MapBasedRow(null, null),
aggregatorsSignature,
false,
false
)
);
aggregatorNames[i] = aggregatorSpecs.get(i).getName();
}
final DateTime start = query.getIntervals().isEmpty() ? DateTimes.EPOCH : query.getIntervals().get(0).getStart();
TimeseriesResultBuilder bob = new TimeseriesResultBuilder(start);
for (int i = 0; i < aggregatorSpecs.size(); i++) {
bob.addMetric(aggregatorNames[i], aggregators[i].get());
aggregators[i].close();
}
return bob.build();
}
@Override
public TimeseriesQueryMetrics makeMetrics(TimeseriesQuery query)
{
TimeseriesQueryMetrics queryMetrics = queryMetricsFactory.makeMetrics();
queryMetrics.query(query);
return queryMetrics;
}
@Override
public TypeReference> getResultTypeReference()
{
return TYPE_REFERENCE;
}
@Override
public CacheStrategy, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query)
{
return new CacheStrategy, Object, TimeseriesQuery>()
{
private final List aggs = query.getAggregatorSpecs();
@Override
public boolean isCacheable(TimeseriesQuery query, boolean willMergeRunners, boolean bySegment)
{
return true;
}
@Override
public byte[] computeCacheKey(TimeseriesQuery query)
{
return new CacheKeyBuilder(TIMESERIES_QUERY)
.appendBoolean(query.isDescending())
.appendBoolean(query.isSkipEmptyBuckets())
.appendCacheable(query.getGranularity())
.appendCacheable(query.getDimensionsFilter())
.appendCacheables(query.getAggregatorSpecs())
.appendCacheable(query.getVirtualColumns())
.appendInt(query.getLimit())
.build();
}
@Override
public byte[] computeResultLevelCacheKey(TimeseriesQuery query)
{
final CacheKeyBuilder builder = new CacheKeyBuilder(TIMESERIES_QUERY)
.appendBoolean(query.isDescending())
.appendBoolean(query.isSkipEmptyBuckets())
.appendCacheable(query.getGranularity())
.appendCacheable(query.getDimensionsFilter())
.appendCacheables(query.getAggregatorSpecs())
.appendCacheable(query.getVirtualColumns())
.appendCacheables(query.getPostAggregatorSpecs())
.appendInt(query.getLimit())
.appendString(query.getTimestampResultField())
.appendBoolean(query.isGrandTotal());
return builder.build();
}
@Override
public TypeReference getCacheObjectClazz()
{
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function, Object> prepareForCache(boolean isResultLevelCache)
{
return input -> {
TimeseriesResultValue results = input.getValue();
final List retVal = Lists.newArrayListWithCapacity(1 + aggs.size());
// Timestamp can be null if grandTotal is true.
if (isResultLevelCache) {
retVal.add(input.getTimestamp() == null ? null : input.getTimestamp().getMillis());
} else {
retVal.add(Preconditions.checkNotNull(input.getTimestamp(), "timestamp of input[%s]", input).getMillis());
}
for (AggregatorFactory agg : aggs) {
retVal.add(results.getMetric(agg.getName()));
}
if (isResultLevelCache) {
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
retVal.add(results.getMetric(postAgg.getName()));
}
}
return retVal;
};
}
@Override
public Function> pullFromCache(boolean isResultLevelCache)
{
return new Function>()
{
private final Granularity granularity = query.getGranularity();
@Override
public Result apply(Object input)
{
List results = (List) input;
final Map retVal = Maps.newLinkedHashMap();
Iterator resultIter = results.iterator();
final Number timestampNumber = (Number) resultIter.next();
final DateTime timestamp;
if (isResultLevelCache) {
timestamp = timestampNumber == null ? null : granularity.toDateTime(timestampNumber.longValue());
} else {
timestamp = granularity.toDateTime(Preconditions.checkNotNull(timestampNumber, "timestamp").longValue());
}
CacheStrategy.fetchAggregatorsFromCache(
aggs,
resultIter,
isResultLevelCache,
(aggName, aggPosition, aggValueObject) -> {
retVal.put(aggName, aggValueObject);
}
);
if (isResultLevelCache) {
Iterator postItr = query.getPostAggregatorSpecs().iterator();
while (postItr.hasNext() && resultIter.hasNext()) {
retVal.put(postItr.next().getName(), resultIter.next());
}
}
return new Result<>(
timestamp,
new TimeseriesResultValue(retVal)
);
}
};
}
};
}
@Override
public QueryRunner> preMergeQueryDecoration(final QueryRunner> runner)
{
return (queryPlus, responseContext) -> {
return runner.run(queryPlus, responseContext);
};
}
@Override
public Function, Result> makePreComputeManipulatorFn(
final TimeseriesQuery query,
final MetricManipulationFn fn
)
{
return makeComputeManipulatorFn(query, fn, false);
}
@Override
public Function, Result> makePostComputeManipulatorFn(
TimeseriesQuery query,
MetricManipulationFn fn
)
{
return makeComputeManipulatorFn(query, fn, true);
}
@Override
public RowSignature resultArraySignature(TimeseriesQuery query)
{
RowSignature.Builder rowSignatureBuilder = RowSignature.builder();
rowSignatureBuilder.addTimeColumn();
if (StringUtils.isNotEmpty(query.getTimestampResultField())) {
rowSignatureBuilder.add(query.getTimestampResultField(), ColumnType.LONG);
}
rowSignatureBuilder.addAggregators(query.getAggregatorSpecs(), RowSignature.Finalization.UNKNOWN);
rowSignatureBuilder.addPostAggregators(query.getPostAggregatorSpecs());
return rowSignatureBuilder.build();
}
@Override
public Sequence resultsAsArrays(
final TimeseriesQuery query,
final Sequence> resultSequence
)
{
final List fields = resultArraySignature(query).getColumnNames();
return Sequences.map(
resultSequence,
result -> {
final Object[] retVal = new Object[fields.size()];
// Position 0 is always __time.
retVal[0] = result.getTimestamp().getMillis();
// Add other fields.
final Map resultMap = result.getValue().getBaseObject();
for (int i = 1; i < fields.size(); i++) {
retVal[i] = resultMap.get(fields.get(i));
}
return retVal;
}
);
}
/**
* This returns a single frame containing the results of the timeseries query
*/
@Override
public Optional> resultsAsFrames(
TimeseriesQuery query,
Sequence> resultSequence,
MemoryAllocatorFactory memoryAllocatorFactory,
boolean useNestedForUnknownTypes
)
{
final RowSignature rowSignature = resultArraySignature(query);
final Pair cursorAndCloseable = IterableRowsCursorHelper.getCursorFromSequence(
resultsAsArrays(query, resultSequence),
rowSignature
);
final Cursor cursor = cursorAndCloseable.lhs;
final Closeable closeable = cursorAndCloseable.rhs;
RowSignature modifiedRowSignature = useNestedForUnknownTypes
? FrameWriterUtils.replaceUnknownTypesWithNestedColumns(rowSignature)
: rowSignature;
FrameWriterFactory frameWriterFactory = FrameWriters.makeFrameWriterFactory(
FrameType.COLUMNAR,
memoryAllocatorFactory,
modifiedRowSignature,
new ArrayList<>()
);
Sequence frames = FrameCursorUtils.cursorToFrames(cursor, frameWriterFactory).withBaggage(closeable);
// All frames are generated with the same signature therefore we can attach the row signature
return Optional.of(frames.map(frame -> new FrameSignaturePair(frame, modifiedRowSignature)));
}
private Function, Result> makeComputeManipulatorFn(
final TimeseriesQuery query,
final MetricManipulationFn fn,
final boolean calculatePostAggs
)
{
return result -> {
final TimeseriesResultValue holder = result.getValue();
final Map values = new HashMap<>(holder.getBaseObject());
if (calculatePostAggs) {
// If "timestampResultField" is set, we must include a copy of the timestamp in the result.
// This is used by the SQL layer when it generates a Timeseries query for a group-by-time-floor SQL query.
// The SQL layer expects the result of the time-floor to have a specific name that is not going to be "__time".
// This should be done before computing post aggregators since they can reference "timestampResultField".
if (StringUtils.isNotEmpty(query.getTimestampResultField()) && result.getTimestamp() != null) {
final DateTime timestamp = result.getTimestamp();
values.put(query.getTimestampResultField(), timestamp.getMillis());
}
if (!query.getPostAggregatorSpecs().isEmpty()) {
// put non finalized aggregators for calculating dependent post Aggregators
for (AggregatorFactory agg : query.getAggregatorSpecs()) {
values.put(agg.getName(), holder.getMetric(agg.getName()));
}
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
values.put(postAgg.getName(), postAgg.compute(values));
}
}
}
for (AggregatorFactory agg : query.getAggregatorSpecs()) {
values.put(agg.getName(), fn.manipulate(agg, holder.getMetric(agg.getName())));
}
return new Result<>(
result.getTimestamp(),
new TimeseriesResultValue(values)
);
};
}
}