All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.client.CachingClusteredClient Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.client;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.primitives.Bytes;
import com.google.inject.Inject;
import org.apache.druid.client.cache.Cache;
import org.apache.druid.client.cache.CacheConfig;
import org.apache.druid.client.cache.CachePopulator;
import org.apache.druid.client.selector.QueryableDruidServer;
import org.apache.druid.client.selector.ServerSelector;
import org.apache.druid.guice.annotations.Client;
import org.apache.druid.guice.annotations.Merging;
import org.apache.druid.guice.annotations.Smile;
import org.apache.druid.guice.http.DruidHttpClientConfig;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.BaseSequence;
import org.apache.druid.java.util.common.guava.LazySequence;
import org.apache.druid.java.util.common.guava.ParallelMergeCombiningSequence;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.query.BrokerParallelMergeConfig;
import org.apache.druid.query.BySegmentResultValueClass;
import org.apache.druid.query.CacheStrategy;
import org.apache.druid.query.Queries;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryContext;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryMetrics;
import org.apache.druid.query.QueryPlus;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QuerySegmentWalker;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.query.QueryToolChestWarehouse;
import org.apache.druid.query.Result;
import org.apache.druid.query.SegmentDescriptor;
import org.apache.druid.query.aggregation.MetricManipulatorFns;
import org.apache.druid.query.context.ResponseContext;
import org.apache.druid.query.filter.DimFilterUtils;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.server.QueryResource;
import org.apache.druid.server.QueryScheduler;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Overshadowable;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.TimelineLookup;
import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline.PartitionChunkEntry;
import org.apache.druid.timeline.partition.PartitionChunk;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ForkJoinPool;
import java.util.function.BinaryOperator;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;

/**
 * This is the class on the Broker that is responsible for making native Druid queries to a cluster of data servers.
 *
 * The main user of this class is {@link org.apache.druid.server.ClientQuerySegmentWalker}. In tests, its behavior
 * is partially mimicked by TestClusterQuerySegmentWalker.
 */
public class CachingClusteredClient implements QuerySegmentWalker
{
  private static final EmittingLogger log = new EmittingLogger(CachingClusteredClient.class);
  private final QueryToolChestWarehouse warehouse;
  private final TimelineServerView serverView;
  private final Cache cache;
  private final ObjectMapper objectMapper;
  private final CachePopulator cachePopulator;
  private final CacheConfig cacheConfig;
  private final DruidHttpClientConfig httpClientConfig;
  private final BrokerParallelMergeConfig parallelMergeConfig;
  private final ForkJoinPool pool;
  private final QueryScheduler scheduler;
  private final ServiceEmitter emitter;

  @Inject
  public CachingClusteredClient(
      QueryToolChestWarehouse warehouse,
      TimelineServerView serverView,
      Cache cache,
      @Smile ObjectMapper objectMapper,
      CachePopulator cachePopulator,
      CacheConfig cacheConfig,
      @Client DruidHttpClientConfig httpClientConfig,
      BrokerParallelMergeConfig parallelMergeConfig,
      @Merging ForkJoinPool pool,
      QueryScheduler scheduler,
      ServiceEmitter emitter
  )
  {
    this.warehouse = warehouse;
    this.serverView = serverView;
    this.cache = cache;
    this.objectMapper = objectMapper;
    this.cachePopulator = cachePopulator;
    this.cacheConfig = cacheConfig;
    this.httpClientConfig = httpClientConfig;
    this.parallelMergeConfig = parallelMergeConfig;
    this.pool = pool;
    this.scheduler = scheduler;
    this.emitter = emitter;

    if (cacheConfig.isQueryCacheable(Query.GROUP_BY) && (cacheConfig.isUseCache() || cacheConfig.isPopulateCache())) {
      log.warn(
          "Even though groupBy caching is enabled in your configuration, v2 groupBys will not be cached on the broker. "
          + "Consider enabling caching on your data nodes if it is not already enabled."
      );
    }

    serverView.registerSegmentCallback(
        Execs.singleThreaded("CCClient-ServerView-CB-%d"),
        new ServerView.BaseSegmentCallback()
        {
          @Override
          public ServerView.CallbackAction segmentRemoved(DruidServerMetadata server, DataSegment segment)
          {
            CachingClusteredClient.this.cache.close(segment.getId().toString());
            return ServerView.CallbackAction.CONTINUE;
          }
        }
    );
  }

  @Override
  public  QueryRunner getQueryRunnerForIntervals(final Query query, final Iterable intervals)
  {
    return new QueryRunner()
    {
      @Override
      public Sequence run(final QueryPlus queryPlus, final ResponseContext responseContext)
      {
        return CachingClusteredClient.this.run(queryPlus, responseContext, timeline -> timeline, false);
      }
    };
  }

  /**
   * Run a query. The timelineConverter will be given the "master" timeline and can be used to return a different
   * timeline, if desired. This is used by getQueryRunnerForSegments.
   */
  private  Sequence run(
      final QueryPlus queryPlus,
      final ResponseContext responseContext,
      final UnaryOperator> timelineConverter,
      final boolean specificSegments
  )
  {
    final ClusterQueryResult result = new SpecificQueryRunnable<>(queryPlus, responseContext)
        .run(timelineConverter, specificSegments);
    initializeNumRemainingResponsesInResponseContext(queryPlus.getQuery(), responseContext, result.numQueryServers);
    return result.sequence;
  }

  private static  void initializeNumRemainingResponsesInResponseContext(
      final Query query,
      final ResponseContext responseContext,
      final int numQueryServers
  )
  {
    responseContext.addRemainingResponse(query.getMostSpecificId(), numQueryServers);
  }

  @Override
  public  QueryRunner getQueryRunnerForSegments(final Query query, final Iterable specs)
  {
    return new QueryRunner()
    {
      @Override
      public Sequence run(final QueryPlus queryPlus, final ResponseContext responseContext)
      {
        return CachingClusteredClient.this.run(
            queryPlus,
            responseContext,
            new TimelineConverter(specs),
            true
        );
      }
    };
  }

  private static class ClusterQueryResult
  {
    private final Sequence sequence;
    private final int numQueryServers;

    private ClusterQueryResult(Sequence sequence, int numQueryServers)
    {
      this.sequence = sequence;
      this.numQueryServers = numQueryServers;
    }
  }

  /**
   * This class essentially encapsulates the major part of the logic of {@link CachingClusteredClient}. It's state and
   * methods couldn't belong to {@link CachingClusteredClient} itself, because they depend on the specific query object
   * being run, but {@link QuerySegmentWalker} API is designed so that implementations should be able to accept
   * arbitrary queries.
   */
  private class SpecificQueryRunnable
  {
    private final ResponseContext responseContext;
    private QueryPlus queryPlus;
    private Query query;
    private final QueryToolChest> toolChest;
    @Nullable
    private final CacheStrategy> strategy;
    private final boolean useCache;
    private final boolean populateCache;
    private final boolean isBySegment;
    private final int uncoveredIntervalsLimit;
    private final Map cachePopulatorKeyMap = new HashMap<>();
    private final DataSourceAnalysis dataSourceAnalysis;
    private final List intervals;
    private final CacheKeyManager cacheKeyManager;

    SpecificQueryRunnable(final QueryPlus queryPlus, final ResponseContext responseContext)
    {
      this.queryPlus = queryPlus;
      this.responseContext = responseContext;
      this.query = queryPlus.getQuery();
      this.toolChest = warehouse.getToolChest(query);
      this.strategy = toolChest.getCacheStrategy(query, objectMapper);
      this.dataSourceAnalysis = query.getDataSource().getAnalysis();

      this.useCache = CacheUtil.isUseSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER);
      this.populateCache = CacheUtil.isPopulateSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER);
      final QueryContext queryContext = query.context();
      this.isBySegment = queryContext.isBySegment();
      // Note that enabling this leads to putting uncovered intervals information in the response headers
      // and might blow up in some cases https://github.com/apache/druid/issues/2108
      this.uncoveredIntervalsLimit = queryContext.getUncoveredIntervalsLimit();
      // For nested queries, we need to look at the intervals of the inner most query.
      this.intervals = dataSourceAnalysis.getBaseQuerySegmentSpec()
                                         .map(QuerySegmentSpec::getIntervals)
                                         .orElseGet(() -> query.getIntervals());
      this.cacheKeyManager = new CacheKeyManager<>(
          query,
          strategy,
          useCache,
          populateCache
      );
    }

    private ImmutableMap makeDownstreamQueryContext()
    {
      final ImmutableMap.Builder contextBuilder = new ImmutableMap.Builder<>();

      final QueryContext queryContext = query.context();
      final int priority = queryContext.getPriority();
      contextBuilder.put(QueryContexts.PRIORITY_KEY, priority);
      final String lane = queryContext.getLane();
      if (lane != null) {
        contextBuilder.put(QueryContexts.LANE_KEY, lane);
      }

      if (populateCache) {
        // prevent down-stream nodes from caching results as well if we are populating the cache
        contextBuilder.put(CacheConfig.POPULATE_CACHE, false);
        contextBuilder.put(QueryContexts.BY_SEGMENT_KEY, true);
      }
      return contextBuilder.build();
    }

    /**
     * Builds a query distribution and merge plan.
     *
     * This method returns an empty sequence if the query datasource is unknown or there is matching result-level cache.
     * Otherwise, it creates a sequence merging sequences from the regular broker cache and remote servers. If parallel
     * merge is enabled, it can merge and *combine* the underlying sequences in parallel.
     *
     * @return a pair of a sequence merging results from remote query servers and the number of remote servers
     *         participating in query processing.
     */
    ClusterQueryResult run(
        final UnaryOperator> timelineConverter,
        final boolean specificSegments
    )
    {
      final Optional> maybeTimeline = serverView.getTimeline(
          dataSourceAnalysis
      );
      if (!maybeTimeline.isPresent()) {
        return new ClusterQueryResult<>(Sequences.empty(), 0);
      }

      final TimelineLookup timeline = timelineConverter.apply(maybeTimeline.get());
      if (uncoveredIntervalsLimit > 0) {
        computeUncoveredIntervals(timeline);
      }

      final Set segmentServers = computeSegmentsToQuery(timeline, specificSegments);
      @Nullable
      final byte[] queryCacheKey = cacheKeyManager.computeSegmentLevelQueryCacheKey();
      @Nullable
      final String prevEtag = (String) query.getContext().get(QueryResource.HEADER_IF_NONE_MATCH);
      if (prevEtag != null) {
        @Nullable
        final String currentEtag = cacheKeyManager.computeResultLevelCachingEtag(segmentServers, queryCacheKey);
        if (null != currentEtag) {
          responseContext.putEntityTag(currentEtag);
        }
        if (currentEtag != null && currentEtag.equals(prevEtag)) {
          return new ClusterQueryResult<>(Sequences.empty(), 0);
        }
      }

      final List> alreadyCachedResults =
          pruneSegmentsWithCachedResults(queryCacheKey, segmentServers);

      query = scheduler.prioritizeAndLaneQuery(queryPlus, segmentServers);
      queryPlus = queryPlus.withQuery(query);
      queryPlus = queryPlus.withQueryMetrics(toolChest);
      queryPlus.getQueryMetrics().reportQueriedSegmentCount(segmentServers.size()).emit(emitter);

      final SortedMap> segmentsByServer = groupSegmentsByServer(segmentServers);
      LazySequence mergedResultSequence = new LazySequence<>(() -> {
        List> sequencesByInterval = new ArrayList<>(alreadyCachedResults.size() + segmentsByServer.size());
        addSequencesFromCache(sequencesByInterval, alreadyCachedResults);
        addSequencesFromServer(sequencesByInterval, segmentsByServer);
        return merge(sequencesByInterval);
      });

      return new ClusterQueryResult<>(scheduler.run(query, mergedResultSequence), segmentsByServer.size());
    }

    private Sequence merge(List> sequencesByInterval)
    {
      BinaryOperator mergeFn = toolChest.createMergeFn(query);
      final QueryContext queryContext = query.context();
      if (parallelMergeConfig.useParallelMergePool() && queryContext.getEnableParallelMerges() && mergeFn != null) {
        final ParallelMergeCombiningSequence parallelSequence = new ParallelMergeCombiningSequence<>(
            pool,
            sequencesByInterval,
            query.getResultOrdering(),
            mergeFn,
            queryContext.hasTimeout(),
            queryContext.getTimeout(),
            queryContext.getPriority(),
            queryContext.getParallelMergeParallelism(parallelMergeConfig.getDefaultMaxQueryParallelism()),
            queryContext.getParallelMergeInitialYieldRows(parallelMergeConfig.getInitialYieldNumRows()),
            queryContext.getParallelMergeSmallBatchRows(parallelMergeConfig.getSmallBatchNumRows()),
            parallelMergeConfig.getTargetRunTimeMillis(),
            reportMetrics -> {
              QueryMetrics queryMetrics = queryPlus.getQueryMetrics();
              if (queryMetrics != null) {
                queryMetrics.parallelMergeParallelism(reportMetrics.getParallelism());
                queryMetrics.reportParallelMergeParallelism(reportMetrics.getParallelism()).emit(emitter);
                queryMetrics.reportParallelMergeInputSequences(reportMetrics.getInputSequences()).emit(emitter);
                queryMetrics.reportParallelMergeInputRows(reportMetrics.getInputRows()).emit(emitter);
                queryMetrics.reportParallelMergeOutputRows(reportMetrics.getOutputRows()).emit(emitter);
                queryMetrics.reportParallelMergeTaskCount(reportMetrics.getTaskCount()).emit(emitter);
                queryMetrics.reportParallelMergeTotalCpuTime(reportMetrics.getTotalCpuTime()).emit(emitter);
                queryMetrics.reportParallelMergeTotalTime(reportMetrics.getTotalTime()).emit(emitter);
                queryMetrics.reportParallelMergeSlowestPartitionTime(reportMetrics.getSlowestPartitionInitializedTime())
                            .emit(emitter);
                queryMetrics.reportParallelMergeFastestPartitionTime(reportMetrics.getFastestPartitionInitializedTime())
                            .emit(emitter);
              }
            }
        );
        scheduler.registerQueryFuture(query, parallelSequence.getCancellationFuture());
        return parallelSequence;
      } else {
        return Sequences
            .simple(sequencesByInterval)
            .flatMerge(seq -> seq, query.getResultOrdering());
      }
    }

    private Set computeSegmentsToQuery(
        TimelineLookup timeline,
        boolean specificSegments
    )
    {
      final java.util.function.Function>> lookupFn
          = specificSegments ? timeline::lookupWithIncompletePartitions : timeline::lookup;

      List> timelineObjectHolders =
          intervals.stream().flatMap(i -> lookupFn.apply(i).stream()).collect(Collectors.toList());
      final List> serversLookup = toolChest.filterSegments(
          query,
          timelineObjectHolders
      );

      final Set segments = new LinkedHashSet<>();
      final Map>> dimensionRangeCache;
      final Set filterFieldsForPruning;

      final boolean trySecondaryPartititionPruning =
          query.getFilter() != null && query.context().isSecondaryPartitionPruningEnabled();

      if (trySecondaryPartititionPruning) {
        dimensionRangeCache = new HashMap<>();
        filterFieldsForPruning =
            DimFilterUtils.onlyBaseFields(query.getFilter().getRequiredColumns(), dataSourceAnalysis);
      } else {
        dimensionRangeCache = null;
        filterFieldsForPruning = null;
      }

      // Filter unneeded chunks based on partition dimension
      for (TimelineObjectHolder holder : serversLookup) {
        final Set> filteredChunks;
        if (trySecondaryPartititionPruning) {
          filteredChunks = DimFilterUtils.filterShards(
              query.getFilter(),
              filterFieldsForPruning,
              holder.getObject(),
              partitionChunk -> partitionChunk.getObject().getSegment().getShardSpec(),
              dimensionRangeCache
          );
        } else {
          filteredChunks = Sets.newLinkedHashSet(holder.getObject());
        }
        for (PartitionChunk chunk : filteredChunks) {
          ServerSelector server = chunk.getObject();
          final SegmentDescriptor segment = new SegmentDescriptor(
              holder.getInterval(),
              holder.getVersion(),
              chunk.getChunkNumber()
          );
          segments.add(new SegmentServerSelector(server, segment));
        }
      }
      return segments;
    }

    private void computeUncoveredIntervals(TimelineLookup timeline)
    {
      final List uncoveredIntervals = new ArrayList<>(uncoveredIntervalsLimit);
      boolean uncoveredIntervalsOverflowed = false;

      for (Interval interval : intervals) {
        Iterable> lookup = timeline.lookup(interval);
        long startMillis = interval.getStartMillis();
        long endMillis = interval.getEndMillis();
        for (TimelineObjectHolder holder : lookup) {
          Interval holderInterval = holder.getInterval();
          long intervalStart = holderInterval.getStartMillis();
          if (!uncoveredIntervalsOverflowed && startMillis != intervalStart) {
            if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
              uncoveredIntervals.add(Intervals.utc(startMillis, intervalStart));
            } else {
              uncoveredIntervalsOverflowed = true;
            }
          }
          startMillis = holderInterval.getEndMillis();
        }

        if (!uncoveredIntervalsOverflowed && startMillis < endMillis) {
          if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
            uncoveredIntervals.add(Intervals.utc(startMillis, endMillis));
          } else {
            uncoveredIntervalsOverflowed = true;
          }
        }
      }

      if (!uncoveredIntervals.isEmpty()) {
        // Record in the response context the interval for which NO segment is present.
        // Which is not necessarily an indication that the data doesn't exist or is
        // incomplete. The data could exist and just not be loaded yet.  In either
        // case, though, this query will not include any data from the identified intervals.
        responseContext.putUncoveredIntervals(uncoveredIntervals, uncoveredIntervalsOverflowed);
      }
    }

    private List> pruneSegmentsWithCachedResults(
        final byte[] queryCacheKey,
        final Set segments
    )
    {
      if (queryCacheKey == null) {
        return Collections.emptyList();
      }
      final List> alreadyCachedResults = new ArrayList<>();
      Map perSegmentCacheKeys = computePerSegmentCacheKeys(
          segments,
          queryCacheKey
      );
      // Pull cached segments from cache and remove from set of segments to query
      final Map cachedValues = computeCachedValues(perSegmentCacheKeys);

      perSegmentCacheKeys.forEach((segment, segmentCacheKey) -> {
        final Interval segmentQueryInterval = segment.getSegmentDescriptor().getInterval();

        final byte[] cachedValue = cachedValues.get(segmentCacheKey);
        if (cachedValue != null) {
          // remove cached segment from set of segments to query
          segments.remove(segment);
          alreadyCachedResults.add(Pair.of(segmentQueryInterval, cachedValue));
        } else if (populateCache) {
          // otherwise, if populating cache, add segment to list of segments to cache
          final SegmentId segmentId = segment.getServer().getSegment().getId();
          addCachePopulatorKey(segmentCacheKey, segmentId, segmentQueryInterval);
        }
      });
      return alreadyCachedResults;
    }

    private Map computePerSegmentCacheKeys(
        Set segments,
        byte[] queryCacheKey
    )
    {
      // cacheKeys map must preserve segment ordering, in order for shards to always be combined in the same order
      Map cacheKeys = Maps.newLinkedHashMap();
      for (SegmentServerSelector segmentServer : segments) {
        final Cache.NamedKey segmentCacheKey = CacheUtil.computeSegmentCacheKey(
            segmentServer.getServer().getSegment().getId().toString(),
            segmentServer.getSegmentDescriptor(),
            queryCacheKey
        );
        cacheKeys.put(segmentServer, segmentCacheKey);
      }
      return cacheKeys;
    }

    private Map computeCachedValues(Map cacheKeys)
    {
      if (useCache) {
        return cache.getBulk(Iterables.limit(cacheKeys.values(), cacheConfig.getCacheBulkMergeLimit()));
      } else {
        return ImmutableMap.of();
      }
    }

    private void addCachePopulatorKey(
        Cache.NamedKey segmentCacheKey,
        SegmentId segmentId,
        Interval segmentQueryInterval
    )
    {
      cachePopulatorKeyMap.put(StringUtils.format("%s_%s", segmentId, segmentQueryInterval), segmentCacheKey);
    }

    @Nullable
    private Cache.NamedKey getCachePopulatorKey(String segmentId, Interval segmentInterval)
    {
      return cachePopulatorKeyMap.get(StringUtils.format("%s_%s", segmentId, segmentInterval));
    }

    private SortedMap> groupSegmentsByServer(Set segments)
    {
      final SortedMap> serverSegments = new TreeMap<>();
      for (SegmentServerSelector segmentServer : segments) {
        final QueryableDruidServer queryableDruidServer = segmentServer.getServer().pick(query);

        if (queryableDruidServer == null) {
          log.makeAlert(
              "No servers found for SegmentDescriptor[%s] for DataSource[%s]?! How can this be?!",
              segmentServer.getSegmentDescriptor(),
              query.getDataSource()
          ).emit();
        } else {
          final DruidServer server = queryableDruidServer.getServer();
          serverSegments.computeIfAbsent(server, s -> new ArrayList<>()).add(segmentServer.getSegmentDescriptor());
        }
      }
      return serverSegments;
    }

    private void addSequencesFromCache(
        final List> listOfSequences,
        final List> cachedResults
    )
    {
      if (strategy == null) {
        return;
      }

      final Function pullFromCacheFunction = strategy.pullFromSegmentLevelCache();
      final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz();
      for (Pair cachedResultPair : cachedResults) {
        final byte[] cachedResult = cachedResultPair.rhs;
        Sequence cachedSequence = new BaseSequence<>(
            new BaseSequence.IteratorMaker>()
            {
              @Override
              public Iterator make()
              {
                try {
                  if (cachedResult.length == 0) {
                    return Collections.emptyIterator();
                  }

                  return objectMapper.readValues(
                      objectMapper.getFactory().createParser(cachedResult),
                      cacheObjectClazz
                  );
                }
                catch (IOException e) {
                  throw new RuntimeException(e);
                }
              }

              @Override
              public void cleanup(Iterator iterFromMake)
              {
              }
            }
        );
        listOfSequences.add(Sequences.map(cachedSequence, pullFromCacheFunction));
      }
    }

    /**
     * Create sequences that reads from remote query servers (historicals and tasks). Note that the broker will
     * hold an HTTP connection per server after this method is called.
     */
    private void addSequencesFromServer(
        final List> listOfSequences,
        final SortedMap> segmentsByServer
    )
    {
      segmentsByServer.forEach((server, segmentsOfServer) -> {
        final QueryRunner serverRunner = serverView.getQueryRunner(server);

        if (serverRunner == null) {
          log.error("Server [%s] doesn't have a query runner", server.getName());
          return;
        }

        // Divide user-provided maxQueuedBytes by the number of servers, and limit each server to that much.
        final long maxQueuedBytes = query.context().getMaxQueuedBytes(httpClientConfig.getMaxQueuedBytes());
        final long maxQueuedBytesPerServer = maxQueuedBytes / segmentsByServer.size();
        final Sequence serverResults;

        if (isBySegment) {
          serverResults = getBySegmentServerResults(serverRunner, segmentsOfServer, maxQueuedBytesPerServer);
        } else if (!server.isSegmentReplicationTarget() || !populateCache) {
          serverResults = getSimpleServerResults(serverRunner, segmentsOfServer, maxQueuedBytesPerServer);
        } else {
          serverResults = getAndCacheServerResults(serverRunner, segmentsOfServer, maxQueuedBytesPerServer);
        }
        listOfSequences.add(serverResults);
      });
    }

    @SuppressWarnings("unchecked")
    private Sequence getBySegmentServerResults(
        final QueryRunner serverRunner,
        final List segmentsOfServer,
        long maxQueuedBytesPerServer
    )
    {
      Sequence>> resultsBySegments = serverRunner
          .run(
              queryPlus.withQuery(
                  Queries.withSpecificSegments(queryPlus.getQuery(), segmentsOfServer)
              ).withMaxQueuedBytes(maxQueuedBytesPerServer),
              responseContext
          );
      // bySegment results need to be de-serialized, see DirectDruidClient.run()
      return (Sequence) resultsBySegments
          .map(result -> result.map(
              resultsOfSegment -> resultsOfSegment.mapResults(
                  toolChest.makePreComputeManipulatorFn(query, MetricManipulatorFns.deserializing())::apply
              )
          ));
    }

    @SuppressWarnings("unchecked")
    private Sequence getSimpleServerResults(
        final QueryRunner serverRunner,
        final List segmentsOfServer,
        long maxQueuedBytesPerServer
    )
    {
      return serverRunner.run(
          queryPlus.withQuery(
              Queries.withSpecificSegments(queryPlus.getQuery(), segmentsOfServer)
          ).withMaxQueuedBytes(maxQueuedBytesPerServer),
          responseContext
      );
    }

    private Sequence getAndCacheServerResults(
        final QueryRunner serverRunner,
        final List segmentsOfServer,
        long maxQueuedBytesPerServer
    )
    {
      @SuppressWarnings("unchecked")
      final Query downstreamQuery = query.withOverriddenContext(makeDownstreamQueryContext());
      final Sequence>> resultsBySegments = serverRunner.run(
          queryPlus
              .withQuery(
                  Queries.withSpecificSegments(
                      downstreamQuery,
                      segmentsOfServer
                  )
              )
              .withMaxQueuedBytes(maxQueuedBytesPerServer),
          responseContext
      );
      final Function cacheFn = strategy.prepareForSegmentLevelCache();

      return resultsBySegments
          .map(result -> {
            final BySegmentResultValueClass resultsOfSegment = result.getValue();
            final Cache.NamedKey cachePopulatorKey =
                getCachePopulatorKey(resultsOfSegment.getSegmentId(), resultsOfSegment.getInterval());
            Sequence res = Sequences.simple(resultsOfSegment.getResults());
            if (cachePopulatorKey != null) {
              res = cachePopulator.wrap(res, cacheFn::apply, cache, cachePopulatorKey);
            }
            return res.map(
                toolChest.makePreComputeManipulatorFn(downstreamQuery, MetricManipulatorFns.deserializing())::apply
            );
          })
          .flatMerge(seq -> seq, query.getResultOrdering());
    }
  }

  /**
   * An inner class that is used solely for computing cache keys. Its a separate class to allow extensive unit testing
   * of cache key generation.
   */
  @VisibleForTesting
  static class CacheKeyManager
  {
    private final Query query;
    private final CacheStrategy> strategy;
    private final boolean isSegmentLevelCachingEnable;

    CacheKeyManager(
        final Query query,
        final CacheStrategy> strategy,
        final boolean useCache,
        final boolean populateCache
    )
    {

      this.query = query;
      this.strategy = strategy;
      this.isSegmentLevelCachingEnable = ((populateCache || useCache)
                                          && !query.context().isBySegment());   // explicit bySegment queries are never cached

    }

    @Nullable
    byte[] computeSegmentLevelQueryCacheKey()
    {
      if (isSegmentLevelCachingEnable) {
        return computeQueryCacheKeyWithJoin();
      }
      return null;
    }

    /**
     * It computes the ETAG which is used by {@link org.apache.druid.query.ResultLevelCachingQueryRunner} for
     * result level caches. queryCacheKey can be null if segment level cache is not being used. However, ETAG
     * is still computed since result level cache may still be on.
     */
    @Nullable
    String computeResultLevelCachingEtag(
        final Set segments,
        @Nullable byte[] queryCacheKey
    )
    {
      Hasher hasher = Hashing.sha1().newHasher();
      boolean hasOnlyHistoricalSegments = true;
      for (SegmentServerSelector p : segments) {
        QueryableDruidServer queryableServer = p.getServer().pick(query);
        if (queryableServer == null || !queryableServer.getServer().isSegmentReplicationTarget()) {
          hasOnlyHistoricalSegments = false;
          break;
        }
        hasher.putString(p.getServer().getSegment().getId().toString(), StandardCharsets.UTF_8);
        // it is important to add the "query interval" as part ETag calculation
        // to have result level cache work correctly for queries with different
        // intervals covering the same set of segments
        hasher.putString(p.rhs.getInterval().toString(), StandardCharsets.UTF_8);
      }

      if (!hasOnlyHistoricalSegments) {
        return null;
      }

      // query cache key can be null if segment level caching is disabled
      final byte[] queryCacheKeyFinal = (queryCacheKey == null) ? computeQueryCacheKeyWithJoin() : queryCacheKey;
      if (queryCacheKeyFinal == null) {
        return null;
      }
      hasher.putBytes(queryCacheKeyFinal);
      String currEtag = StringUtils.encodeBase64String(hasher.hash().asBytes());
      return currEtag;
    }

    /**
     * Adds the cache key prefix for join data sources. Return null if its a join but caching is not supported
     */
    @Nullable
    private byte[] computeQueryCacheKeyWithJoin()
    {
      Preconditions.checkNotNull(strategy, "strategy cannot be null");
      byte[] dataSourceCacheKey = query.getDataSource().getCacheKey();
      if (null == dataSourceCacheKey) {
        return null;
      } else if (dataSourceCacheKey.length > 0) {
        return Bytes.concat(dataSourceCacheKey, strategy.computeCacheKey(query));
      } else {
        return strategy.computeCacheKey(query);
      }
    }
  }

  /**
   * Helper class to build a new timeline of filtered segments.
   */
  public static class TimelineConverter> implements UnaryOperator>
  {
    private final Iterable specs;

    public TimelineConverter(final Iterable specs)
    {
      this.specs = specs;
    }

    @Override
    public TimelineLookup apply(TimelineLookup timeline)
    {
      Iterator> unfilteredIterator =
          Iterators.transform(specs.iterator(), spec -> toChunkEntry(timeline, spec));
      Iterator> iterator = Iterators.filter(
          unfilteredIterator,
          Objects::nonNull
      );
      final VersionedIntervalTimeline newTimeline =
          new VersionedIntervalTimeline<>(Ordering.natural(), true);
      // VersionedIntervalTimeline#addAll implementation is much more efficient than calling VersionedIntervalTimeline#add
      // in a loop when there are lot of segments to be added for same interval and version.
      newTimeline.addAll(iterator);
      return newTimeline;
    }

    @Nullable
    private PartitionChunkEntry toChunkEntry(
        TimelineLookup timeline,
        SegmentDescriptor spec
    )
    {
      PartitionChunk chunk = timeline.findChunk(
          spec.getInterval(),
          spec.getVersion(),
          spec.getPartitionNumber()
      );
      if (null == chunk) {
        return null;
      }
      return new PartitionChunkEntry<>(spec.getInterval(), spec.getVersion(), chunk);
    }
  }
}