![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.druid.server.QueryScheduler Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.server;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.SetMultimap;
import com.google.common.util.concurrent.ListenableFuture;
import io.github.resilience4j.bulkhead.Bulkhead;
import io.github.resilience4j.bulkhead.BulkheadConfig;
import io.github.resilience4j.bulkhead.BulkheadRegistry;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import org.apache.druid.client.SegmentServerSelector;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.LazySequence;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.SequenceWrapper;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.core.NoopEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryCapacityExceededException;
import org.apache.druid.query.QueryPlus;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QueryWatcher;
import org.apache.druid.server.initialization.ServerConfig;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
/**
* QueryScheduler (potentially) assigns any {@link Query} that is to be executed to a 'query lane' using the
* {@link QueryLaningStrategy} that is defined in {@link QuerySchedulerConfig}.
*
* As a {@link QueryWatcher}, it also provides cancellation facilities to brokers, historicals, and realtime tasks.
*
* This class is shared by all requests on the HTTP theadpool and must be thread safe.
*/
public class QueryScheduler implements QueryWatcher
{
private static final Logger LOGGER = new Logger(QueryScheduler.class);
public static final int UNAVAILABLE = -1;
public static final String TOTAL = "total";
private final int totalCapacity;
private final QueryPrioritizationStrategy prioritizationStrategy;
private final QueryLaningStrategy laningStrategy;
private final BulkheadRegistry laneRegistry;
/**
* mapping of query id to set of futures associated with the query.
* This map is synchronized as there are 2 threads, query execution thread and query canceling thread,
* that can access the map at the same time.
*
* The updates (additions and removals) on this and {@link #queryDatasources} are racy
* as those updates are not being done atomically on those 2 maps,
* but it is OK in most cases since they will be cleaned up once the query is done.
*/
private final SetMultimap> queryFutures;
/**
* mapping of query id to set of datasource names that are being queried, used for authorization.
* This map is synchronized as there are 2 threads, query execution thread and query canceling thread,
* that can access the map at the same time.
*
* The updates (additions and removals) on this and {@link #queryFutures} are racy
* as those updates are not being done atomically on those 2 maps,
* but it is OK in most cases since they will be cleaned up once the query is done.
*/
private final SetMultimap queryDatasources;
private final ServiceEmitter emitter;
public QueryScheduler(
int totalNumThreads,
QueryPrioritizationStrategy prioritizationStrategy,
QueryLaningStrategy laningStrategy,
ServerConfig serverConfig,
ServiceEmitter emitter
)
{
this.prioritizationStrategy = prioritizationStrategy;
this.laningStrategy = laningStrategy;
this.queryFutures = Multimaps.synchronizedSetMultimap(HashMultimap.create());
this.queryDatasources = Multimaps.synchronizedSetMultimap(HashMultimap.create());
// if totalNumThreads is above 0 and less than druid.server.http.numThreads and
// requests are not being queued by Jetty, enforce total limit
final boolean limitTotal;
if (totalNumThreads > 0
&& totalNumThreads < serverConfig.getNumThreads()
&& !serverConfig.isEnableQueryRequestsQueuing()) {
limitTotal = true;
this.totalCapacity = totalNumThreads;
} else {
limitTotal = false;
this.totalCapacity = serverConfig.getNumThreads();
}
this.laneRegistry = BulkheadRegistry.of(getLaneConfigs(limitTotal));
this.emitter = emitter;
}
/**
* Keeping the old constructor as many test classes are dependent on this
*/
@VisibleForTesting
public QueryScheduler(
int totalNumThreads,
QueryPrioritizationStrategy prioritizationStrategy,
QueryLaningStrategy laningStrategy,
ServerConfig serverConfig
)
{
this(totalNumThreads, prioritizationStrategy, laningStrategy, serverConfig, new ServiceEmitter("test", "localhost", new NoopEmitter()));
}
@Override
public void registerQueryFuture(Query> query, ListenableFuture> future)
{
final String id = query.getId();
final Set datasources = query.getDataSource().getTableNames();
queryFutures.put(id, future);
queryDatasources.putAll(id, datasources);
future.addListener(
() -> {
queryFutures.remove(id, future);
for (String datasource : datasources) {
queryDatasources.remove(id, datasource);
}
},
Execs.directExecutor()
);
}
/**
* Assign a query a priority and lane (if not set)
*/
public Query prioritizeAndLaneQuery(QueryPlus queryPlus, Set segments)
{
Query query = queryPlus.getQuery();
Optional priority = prioritizationStrategy.computePriority(queryPlus, segments);
query = priority.map(query::withPriority).orElse(query);
Optional lane = laningStrategy.computeLane(queryPlus.withQuery(query), segments);
LOGGER.debug(
"[%s] lane assigned to [%s] query with [%,d] priority",
lane.orElse("default"),
query.getType(),
priority.orElse(0)
);
final ServiceMetricEvent.Builder builderUsr = ServiceMetricEvent.builder().setFeed("metrics")
.setDimension("lane", lane.orElse("default"))
.setDimension("dataSource", query.getDataSource().getTableNames())
.setDimension("type", query.getType());
emitter.emit(builderUsr.setMetric("query/priority", priority.orElse(Integer.valueOf(0))));
return lane.map(query::withLane).orElse(query);
}
/**
* Run a query with the scheduler, attempting to acquire a semaphore from the total and lane specific query capacities
*
* Note that {@link #cancelQuery} should not interrupt the thread that calls run, in all current usages it only
* cancels any {@link ListenableFuture} created downstream. If this ever commonly changes, we should add
* synchronization between {@link #cancelQuery} and the acquisition of the {@link Bulkhead} to continue to ensure that
* anything acquired is also released.
*
* In the meantime, if a {@link ListenableFuture} is registered for the query that calls this method, it MUST handle
* this synchronization itself to ensure that no {@link Bulkhead} is acquired without releasing it.
*/
public Sequence run(Query> query, Sequence resultSequence)
{
return Sequences.wrap(resultSequence, new SequenceWrapper()
{
private List bulkheads = null;
@Override
public void before()
{
bulkheads = acquireLanes(query);
}
@Override
public void after(boolean isDone, Throwable thrown)
{
if (bulkheads != null) {
finishLanes(bulkheads);
}
}
});
}
/**
* Returns a {@link QueryRunner} that will call {@link QueryScheduler#run} when {@link QueryRunner#run} is called.
*/
public QueryRunner wrapQueryRunner(QueryRunner baseRunner)
{
return (queryPlus, responseContext) ->
QueryScheduler.this.run(
queryPlus.getQuery(), new LazySequence<>(() -> baseRunner.run(queryPlus, responseContext))
);
}
/**
* Forcibly cancel all futures that have been registered to a specific query id
*/
public boolean cancelQuery(String id)
{
// if multiple independent queries from the same or different users share a query id, all will be cancelled due
// to the collision
queryDatasources.removeAll(id);
Set> futures = queryFutures.removeAll(id);
boolean success = true;
for (ListenableFuture> future : futures) {
success = success && future.cancel(true);
}
return success;
}
/**
* Get a {@link Set} of datasource names for a {@link Query} id, used by {@link QueryResource#cancelQuery} to
* authorize that a user may call {@link #cancelQuery} for the given id and datasources
*/
public Set getQueryDatasources(final String queryId)
{
return queryDatasources.get(queryId);
}
/**
* Get the maximum number of concurrent queries that {@link #run} can support
*/
@VisibleForTesting
int getTotalAvailableCapacity()
{
return laneRegistry.getConfiguration(TOTAL)
.map(config -> laneRegistry.bulkhead(TOTAL, config).getMetrics().getAvailableConcurrentCalls())
.orElse(UNAVAILABLE);
}
/**
* Get the maximum number of concurrent queries that {@link #run} can support for a given lane
*/
@VisibleForTesting
int getLaneAvailableCapacity(String lane)
{
return laneRegistry.getConfiguration(lane)
.map(config -> laneRegistry.bulkhead(lane, config).getMetrics().getAvailableConcurrentCalls())
.orElse(UNAVAILABLE);
}
/**
* Acquire a semaphore for both the 'total' and a lane, if any is associated with a query
*/
@VisibleForTesting
List acquireLanes(Query> query)
{
final String lane = query.context().getLane();
final Optional laneConfig = lane == null ? Optional.empty() : laneRegistry.getConfiguration(lane);
final Optional totalConfig = laneRegistry.getConfiguration(TOTAL);
List hallPasses = new ArrayList<>(2);
try {
// if we have a lane, get it first
laneConfig.ifPresent(config -> {
Bulkhead laneLimiter = laneRegistry.bulkhead(lane, config);
if (!laneLimiter.tryAcquirePermission()) {
throw new QueryCapacityExceededException(lane, config.getMaxConcurrentCalls());
}
hallPasses.add(laneLimiter);
});
// everyone needs to take one from the total lane; to ensure we don't acquire a lane and never release it, we want
// to check for total capacity exceeded and release the lane (if present) before throwing capacity exceeded
// note that this isn't strictly fair: the bulkhead doesn't use a fair semaphore, the first to acquire the lane
// might lose to one that came after it when acquiring the total, or an unlaned query might lose to a laned query
totalConfig.ifPresent(config -> {
Bulkhead totalLimiter = laneRegistry.bulkhead(TOTAL, config);
if (!totalLimiter.tryAcquirePermission()) {
throw new QueryCapacityExceededException(config.getMaxConcurrentCalls());
}
hallPasses.add(totalLimiter);
});
return hallPasses;
}
catch (Exception ex) {
releaseLanes(hallPasses);
throw ex;
}
}
/**
* Release all {@link Bulkhead} semaphores in the list
*/
@VisibleForTesting
void releaseLanes(List bulkheads)
{
bulkheads.forEach(Bulkhead::releasePermission);
}
@VisibleForTesting
void finishLanes(List bulkheads)
{
bulkheads.forEach(Bulkhead::onComplete);
}
/**
* With a total thread count and {@link QueryLaningStrategy#getLaneLimits}, create a map of lane name to
* {@link BulkheadConfig} to be used to create the {@link #laneRegistry}. This accepts the configured value of
* numThreads rather than using {@link #totalCapacity} so that we only have a total {@link Bulkhead} if
* {@link QuerySchedulerConfig#getNumThreads()} is set
*/
private Map getLaneConfigs(boolean hasTotalLimit)
{
Map configs = new HashMap<>();
if (hasTotalLimit) {
configs.put(
TOTAL,
BulkheadConfig.custom().maxConcurrentCalls(totalCapacity).maxWaitDuration(Duration.ZERO).build()
);
}
for (Object2IntMap.Entry entry : laningStrategy.getLaneLimits(totalCapacity).object2IntEntrySet()) {
configs.put(
entry.getKey(),
BulkheadConfig.custom().maxConcurrentCalls(entry.getIntValue()).maxWaitDuration(Duration.ZERO).build()
);
}
return configs;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy