org.apache.druid.client.CachingClusteredClient Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.client;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.primitives.Bytes;
import com.google.inject.Inject;
import org.apache.druid.client.cache.Cache;
import org.apache.druid.client.cache.CacheConfig;
import org.apache.druid.client.cache.CachePopulator;
import org.apache.druid.client.selector.QueryableDruidServer;
import org.apache.druid.client.selector.ServerSelector;
import org.apache.druid.guice.annotations.Client;
import org.apache.druid.guice.annotations.Merging;
import org.apache.druid.guice.annotations.Smile;
import org.apache.druid.guice.http.DruidHttpClientConfig;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.BaseSequence;
import org.apache.druid.java.util.common.guava.LazySequence;
import org.apache.druid.java.util.common.guava.ParallelMergeCombiningSequence;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.query.BrokerParallelMergeConfig;
import org.apache.druid.query.BySegmentResultValueClass;
import org.apache.druid.query.CacheStrategy;
import org.apache.druid.query.Queries;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryContext;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryMetrics;
import org.apache.druid.query.QueryPlus;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QuerySegmentWalker;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.query.QueryToolChestWarehouse;
import org.apache.druid.query.Result;
import org.apache.druid.query.SegmentDescriptor;
import org.apache.druid.query.aggregation.MetricManipulatorFns;
import org.apache.druid.query.context.ResponseContext;
import org.apache.druid.query.filter.DimFilterUtils;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.server.QueryResource;
import org.apache.druid.server.QueryScheduler;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Overshadowable;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.TimelineLookup;
import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline.PartitionChunkEntry;
import org.apache.druid.timeline.partition.PartitionChunk;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ForkJoinPool;
import java.util.function.BinaryOperator;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
/**
* This is the class on the Broker that is responsible for making native Druid queries to a cluster of data servers.
*
* The main user of this class is {@link org.apache.druid.server.ClientQuerySegmentWalker}. In tests, its behavior
* is partially mimicked by TestClusterQuerySegmentWalker.
*/
public class CachingClusteredClient implements QuerySegmentWalker
{
private static final EmittingLogger log = new EmittingLogger(CachingClusteredClient.class);
private final QueryToolChestWarehouse warehouse;
private final TimelineServerView serverView;
private final Cache cache;
private final ObjectMapper objectMapper;
private final CachePopulator cachePopulator;
private final CacheConfig cacheConfig;
private final DruidHttpClientConfig httpClientConfig;
private final BrokerParallelMergeConfig parallelMergeConfig;
private final ForkJoinPool pool;
private final QueryScheduler scheduler;
private final ServiceEmitter emitter;
@Inject
public CachingClusteredClient(
QueryToolChestWarehouse warehouse,
TimelineServerView serverView,
Cache cache,
@Smile ObjectMapper objectMapper,
CachePopulator cachePopulator,
CacheConfig cacheConfig,
@Client DruidHttpClientConfig httpClientConfig,
BrokerParallelMergeConfig parallelMergeConfig,
@Merging ForkJoinPool pool,
QueryScheduler scheduler,
ServiceEmitter emitter
)
{
this.warehouse = warehouse;
this.serverView = serverView;
this.cache = cache;
this.objectMapper = objectMapper;
this.cachePopulator = cachePopulator;
this.cacheConfig = cacheConfig;
this.httpClientConfig = httpClientConfig;
this.parallelMergeConfig = parallelMergeConfig;
this.pool = pool;
this.scheduler = scheduler;
this.emitter = emitter;
if (cacheConfig.isQueryCacheable(Query.GROUP_BY) && (cacheConfig.isUseCache() || cacheConfig.isPopulateCache())) {
log.warn(
"Even though groupBy caching is enabled in your configuration, v2 groupBys will not be cached on the broker. "
+ "Consider enabling caching on your data nodes if it is not already enabled."
);
}
serverView.registerSegmentCallback(
Execs.singleThreaded("CCClient-ServerView-CB-%d"),
new ServerView.BaseSegmentCallback()
{
@Override
public ServerView.CallbackAction segmentRemoved(DruidServerMetadata server, DataSegment segment)
{
CachingClusteredClient.this.cache.close(segment.getId().toString());
return ServerView.CallbackAction.CONTINUE;
}
}
);
}
@Override
public QueryRunner getQueryRunnerForIntervals(final Query query, final Iterable intervals)
{
return new QueryRunner()
{
@Override
public Sequence run(final QueryPlus queryPlus, final ResponseContext responseContext)
{
return CachingClusteredClient.this.run(queryPlus, responseContext, timeline -> timeline, false);
}
};
}
/**
* Run a query. The timelineConverter will be given the "master" timeline and can be used to return a different
* timeline, if desired. This is used by getQueryRunnerForSegments.
*/
private Sequence run(
final QueryPlus queryPlus,
final ResponseContext responseContext,
final UnaryOperator> timelineConverter,
final boolean specificSegments
)
{
final ClusterQueryResult result = new SpecificQueryRunnable<>(queryPlus, responseContext)
.run(timelineConverter, specificSegments);
initializeNumRemainingResponsesInResponseContext(queryPlus.getQuery(), responseContext, result.numQueryServers);
return result.sequence;
}
private static void initializeNumRemainingResponsesInResponseContext(
final Query query,
final ResponseContext responseContext,
final int numQueryServers
)
{
responseContext.addRemainingResponse(query.getMostSpecificId(), numQueryServers);
}
@Override
public QueryRunner getQueryRunnerForSegments(final Query query, final Iterable specs)
{
return new QueryRunner()
{
@Override
public Sequence run(final QueryPlus queryPlus, final ResponseContext responseContext)
{
return CachingClusteredClient.this.run(
queryPlus,
responseContext,
new TimelineConverter(specs),
true
);
}
};
}
private static class ClusterQueryResult
{
private final Sequence sequence;
private final int numQueryServers;
private ClusterQueryResult(Sequence sequence, int numQueryServers)
{
this.sequence = sequence;
this.numQueryServers = numQueryServers;
}
}
/**
* This class essentially encapsulates the major part of the logic of {@link CachingClusteredClient}. It's state and
* methods couldn't belong to {@link CachingClusteredClient} itself, because they depend on the specific query object
* being run, but {@link QuerySegmentWalker} API is designed so that implementations should be able to accept
* arbitrary queries.
*/
private class SpecificQueryRunnable
{
private final ResponseContext responseContext;
private QueryPlus queryPlus;
private Query query;
private final QueryToolChest> toolChest;
@Nullable
private final CacheStrategy> strategy;
private final boolean useCache;
private final boolean populateCache;
private final boolean isBySegment;
private final int uncoveredIntervalsLimit;
private final Map cachePopulatorKeyMap = new HashMap<>();
private final DataSourceAnalysis dataSourceAnalysis;
private final List intervals;
private final CacheKeyManager cacheKeyManager;
SpecificQueryRunnable(final QueryPlus queryPlus, final ResponseContext responseContext)
{
this.queryPlus = queryPlus;
this.responseContext = responseContext;
this.query = queryPlus.getQuery();
this.toolChest = warehouse.getToolChest(query);
this.strategy = toolChest.getCacheStrategy(query, objectMapper);
this.dataSourceAnalysis = query.getDataSource().getAnalysis();
this.useCache = CacheUtil.isUseSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER);
this.populateCache = CacheUtil.isPopulateSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER);
final QueryContext queryContext = query.context();
this.isBySegment = queryContext.isBySegment();
// Note that enabling this leads to putting uncovered intervals information in the response headers
// and might blow up in some cases https://github.com/apache/druid/issues/2108
this.uncoveredIntervalsLimit = queryContext.getUncoveredIntervalsLimit();
// For nested queries, we need to look at the intervals of the inner most query.
this.intervals = dataSourceAnalysis.getBaseQuerySegmentSpec()
.map(QuerySegmentSpec::getIntervals)
.orElseGet(() -> query.getIntervals());
this.cacheKeyManager = new CacheKeyManager<>(
query,
strategy,
useCache,
populateCache
);
}
private ImmutableMap makeDownstreamQueryContext()
{
final ImmutableMap.Builder contextBuilder = new ImmutableMap.Builder<>();
final QueryContext queryContext = query.context();
final int priority = queryContext.getPriority();
contextBuilder.put(QueryContexts.PRIORITY_KEY, priority);
final String lane = queryContext.getLane();
if (lane != null) {
contextBuilder.put(QueryContexts.LANE_KEY, lane);
}
if (populateCache) {
// prevent down-stream nodes from caching results as well if we are populating the cache
contextBuilder.put(CacheConfig.POPULATE_CACHE, false);
contextBuilder.put(QueryContexts.BY_SEGMENT_KEY, true);
}
return contextBuilder.build();
}
/**
* Builds a query distribution and merge plan.
*
* This method returns an empty sequence if the query datasource is unknown or there is matching result-level cache.
* Otherwise, it creates a sequence merging sequences from the regular broker cache and remote servers. If parallel
* merge is enabled, it can merge and *combine* the underlying sequences in parallel.
*
* @return a pair of a sequence merging results from remote query servers and the number of remote servers
* participating in query processing.
*/
ClusterQueryResult run(
final UnaryOperator> timelineConverter,
final boolean specificSegments
)
{
final Optional extends TimelineLookup> maybeTimeline = serverView.getTimeline(
dataSourceAnalysis
);
if (!maybeTimeline.isPresent()) {
return new ClusterQueryResult<>(Sequences.empty(), 0);
}
final TimelineLookup timeline = timelineConverter.apply(maybeTimeline.get());
if (uncoveredIntervalsLimit > 0) {
computeUncoveredIntervals(timeline);
}
final Set segmentServers = computeSegmentsToQuery(timeline, specificSegments);
@Nullable
final byte[] queryCacheKey = cacheKeyManager.computeSegmentLevelQueryCacheKey();
@Nullable
final String prevEtag = (String) query.getContext().get(QueryResource.HEADER_IF_NONE_MATCH);
if (prevEtag != null) {
@Nullable
final String currentEtag = cacheKeyManager.computeResultLevelCachingEtag(segmentServers, queryCacheKey);
if (null != currentEtag) {
responseContext.putEntityTag(currentEtag);
}
if (currentEtag != null && currentEtag.equals(prevEtag)) {
return new ClusterQueryResult<>(Sequences.empty(), 0);
}
}
final List> alreadyCachedResults =
pruneSegmentsWithCachedResults(queryCacheKey, segmentServers);
query = scheduler.prioritizeAndLaneQuery(queryPlus, segmentServers);
queryPlus = queryPlus.withQuery(query);
queryPlus = queryPlus.withQueryMetrics(toolChest);
queryPlus.getQueryMetrics().reportQueriedSegmentCount(segmentServers.size()).emit(emitter);
final SortedMap> segmentsByServer = groupSegmentsByServer(segmentServers);
LazySequence mergedResultSequence = new LazySequence<>(() -> {
List> sequencesByInterval = new ArrayList<>(alreadyCachedResults.size() + segmentsByServer.size());
addSequencesFromCache(sequencesByInterval, alreadyCachedResults);
addSequencesFromServer(sequencesByInterval, segmentsByServer);
return merge(sequencesByInterval);
});
return new ClusterQueryResult<>(scheduler.run(query, mergedResultSequence), segmentsByServer.size());
}
private Sequence merge(List> sequencesByInterval)
{
BinaryOperator mergeFn = toolChest.createMergeFn(query);
final QueryContext queryContext = query.context();
if (parallelMergeConfig.useParallelMergePool() && queryContext.getEnableParallelMerges() && mergeFn != null) {
final ParallelMergeCombiningSequence parallelSequence = new ParallelMergeCombiningSequence<>(
pool,
sequencesByInterval,
query.getResultOrdering(),
mergeFn,
queryContext.hasTimeout(),
queryContext.getTimeout(),
queryContext.getPriority(),
queryContext.getParallelMergeParallelism(parallelMergeConfig.getDefaultMaxQueryParallelism()),
queryContext.getParallelMergeInitialYieldRows(parallelMergeConfig.getInitialYieldNumRows()),
queryContext.getParallelMergeSmallBatchRows(parallelMergeConfig.getSmallBatchNumRows()),
parallelMergeConfig.getTargetRunTimeMillis(),
reportMetrics -> {
QueryMetrics> queryMetrics = queryPlus.getQueryMetrics();
if (queryMetrics != null) {
queryMetrics.parallelMergeParallelism(reportMetrics.getParallelism());
queryMetrics.reportParallelMergeParallelism(reportMetrics.getParallelism()).emit(emitter);
queryMetrics.reportParallelMergeInputSequences(reportMetrics.getInputSequences()).emit(emitter);
queryMetrics.reportParallelMergeInputRows(reportMetrics.getInputRows()).emit(emitter);
queryMetrics.reportParallelMergeOutputRows(reportMetrics.getOutputRows()).emit(emitter);
queryMetrics.reportParallelMergeTaskCount(reportMetrics.getTaskCount()).emit(emitter);
queryMetrics.reportParallelMergeTotalCpuTime(reportMetrics.getTotalCpuTime()).emit(emitter);
queryMetrics.reportParallelMergeTotalTime(reportMetrics.getTotalTime()).emit(emitter);
queryMetrics.reportParallelMergeSlowestPartitionTime(reportMetrics.getSlowestPartitionInitializedTime())
.emit(emitter);
queryMetrics.reportParallelMergeFastestPartitionTime(reportMetrics.getFastestPartitionInitializedTime())
.emit(emitter);
}
}
);
scheduler.registerQueryFuture(query, parallelSequence.getCancellationFuture());
return parallelSequence;
} else {
return Sequences
.simple(sequencesByInterval)
.flatMerge(seq -> seq, query.getResultOrdering());
}
}
private Set computeSegmentsToQuery(
TimelineLookup timeline,
boolean specificSegments
)
{
final java.util.function.Function>> lookupFn
= specificSegments ? timeline::lookupWithIncompletePartitions : timeline::lookup;
List> timelineObjectHolders =
intervals.stream().flatMap(i -> lookupFn.apply(i).stream()).collect(Collectors.toList());
final List> serversLookup = toolChest.filterSegments(
query,
timelineObjectHolders
);
final Set segments = new LinkedHashSet<>();
final Map>> dimensionRangeCache;
final Set filterFieldsForPruning;
final boolean trySecondaryPartititionPruning =
query.getFilter() != null && query.context().isSecondaryPartitionPruningEnabled();
if (trySecondaryPartititionPruning) {
dimensionRangeCache = new HashMap<>();
filterFieldsForPruning =
DimFilterUtils.onlyBaseFields(query.getFilter().getRequiredColumns(), dataSourceAnalysis);
} else {
dimensionRangeCache = null;
filterFieldsForPruning = null;
}
// Filter unneeded chunks based on partition dimension
for (TimelineObjectHolder holder : serversLookup) {
final Set> filteredChunks;
if (trySecondaryPartititionPruning) {
filteredChunks = DimFilterUtils.filterShards(
query.getFilter(),
filterFieldsForPruning,
holder.getObject(),
partitionChunk -> partitionChunk.getObject().getSegment().getShardSpec(),
dimensionRangeCache
);
} else {
filteredChunks = Sets.newLinkedHashSet(holder.getObject());
}
for (PartitionChunk chunk : filteredChunks) {
ServerSelector server = chunk.getObject();
final SegmentDescriptor segment = new SegmentDescriptor(
holder.getInterval(),
holder.getVersion(),
chunk.getChunkNumber()
);
segments.add(new SegmentServerSelector(server, segment));
}
}
return segments;
}
private void computeUncoveredIntervals(TimelineLookup timeline)
{
final List uncoveredIntervals = new ArrayList<>(uncoveredIntervalsLimit);
boolean uncoveredIntervalsOverflowed = false;
for (Interval interval : intervals) {
Iterable> lookup = timeline.lookup(interval);
long startMillis = interval.getStartMillis();
long endMillis = interval.getEndMillis();
for (TimelineObjectHolder holder : lookup) {
Interval holderInterval = holder.getInterval();
long intervalStart = holderInterval.getStartMillis();
if (!uncoveredIntervalsOverflowed && startMillis != intervalStart) {
if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
uncoveredIntervals.add(Intervals.utc(startMillis, intervalStart));
} else {
uncoveredIntervalsOverflowed = true;
}
}
startMillis = holderInterval.getEndMillis();
}
if (!uncoveredIntervalsOverflowed && startMillis < endMillis) {
if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
uncoveredIntervals.add(Intervals.utc(startMillis, endMillis));
} else {
uncoveredIntervalsOverflowed = true;
}
}
}
if (!uncoveredIntervals.isEmpty()) {
// Record in the response context the interval for which NO segment is present.
// Which is not necessarily an indication that the data doesn't exist or is
// incomplete. The data could exist and just not be loaded yet. In either
// case, though, this query will not include any data from the identified intervals.
responseContext.putUncoveredIntervals(uncoveredIntervals, uncoveredIntervalsOverflowed);
}
}
private List> pruneSegmentsWithCachedResults(
final byte[] queryCacheKey,
final Set segments
)
{
if (queryCacheKey == null) {
return Collections.emptyList();
}
final List> alreadyCachedResults = new ArrayList<>();
Map perSegmentCacheKeys = computePerSegmentCacheKeys(
segments,
queryCacheKey
);
// Pull cached segments from cache and remove from set of segments to query
final Map cachedValues = computeCachedValues(perSegmentCacheKeys);
perSegmentCacheKeys.forEach((segment, segmentCacheKey) -> {
final Interval segmentQueryInterval = segment.getSegmentDescriptor().getInterval();
final byte[] cachedValue = cachedValues.get(segmentCacheKey);
if (cachedValue != null) {
// remove cached segment from set of segments to query
segments.remove(segment);
alreadyCachedResults.add(Pair.of(segmentQueryInterval, cachedValue));
} else if (populateCache) {
// otherwise, if populating cache, add segment to list of segments to cache
final SegmentId segmentId = segment.getServer().getSegment().getId();
addCachePopulatorKey(segmentCacheKey, segmentId, segmentQueryInterval);
}
});
return alreadyCachedResults;
}
private Map computePerSegmentCacheKeys(
Set segments,
byte[] queryCacheKey
)
{
// cacheKeys map must preserve segment ordering, in order for shards to always be combined in the same order
Map cacheKeys = Maps.newLinkedHashMap();
for (SegmentServerSelector segmentServer : segments) {
final Cache.NamedKey segmentCacheKey = CacheUtil.computeSegmentCacheKey(
segmentServer.getServer().getSegment().getId().toString(),
segmentServer.getSegmentDescriptor(),
queryCacheKey
);
cacheKeys.put(segmentServer, segmentCacheKey);
}
return cacheKeys;
}
private Map computeCachedValues(Map cacheKeys)
{
if (useCache) {
return cache.getBulk(Iterables.limit(cacheKeys.values(), cacheConfig.getCacheBulkMergeLimit()));
} else {
return ImmutableMap.of();
}
}
private void addCachePopulatorKey(
Cache.NamedKey segmentCacheKey,
SegmentId segmentId,
Interval segmentQueryInterval
)
{
cachePopulatorKeyMap.put(StringUtils.format("%s_%s", segmentId, segmentQueryInterval), segmentCacheKey);
}
@Nullable
private Cache.NamedKey getCachePopulatorKey(String segmentId, Interval segmentInterval)
{
return cachePopulatorKeyMap.get(StringUtils.format("%s_%s", segmentId, segmentInterval));
}
private SortedMap> groupSegmentsByServer(Set segments)
{
final SortedMap> serverSegments = new TreeMap<>();
for (SegmentServerSelector segmentServer : segments) {
final QueryableDruidServer queryableDruidServer = segmentServer.getServer().pick(query);
if (queryableDruidServer == null) {
log.makeAlert(
"No servers found for SegmentDescriptor[%s] for DataSource[%s]?! How can this be?!",
segmentServer.getSegmentDescriptor(),
query.getDataSource()
).emit();
} else {
final DruidServer server = queryableDruidServer.getServer();
serverSegments.computeIfAbsent(server, s -> new ArrayList<>()).add(segmentServer.getSegmentDescriptor());
}
}
return serverSegments;
}
private void addSequencesFromCache(
final List> listOfSequences,
final List> cachedResults
)
{
if (strategy == null) {
return;
}
final Function
© 2015 - 2025 Weber Informatics LLC | Privacy Policy