org.apache.flink.runtime.scheduler.SlotSharingExecutionSlotAllocator Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.scheduler;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.clusterframework.types.SlotProfile;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.jobmaster.LogicalSlot;
import org.apache.flink.runtime.jobmaster.SlotRequestId;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotRequest;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotRequestBulkChecker;
import org.apache.flink.runtime.scheduler.SharedSlotProfileRetriever.SharedSlotProfileRetrieverFactory;
import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* Allocates {@link LogicalSlot}s from physical shared slots.
*
* The allocator maintains a shared slot for each {@link ExecutionSlotSharingGroup}. It allocates
* a physical slot for the shared slot and then allocates logical slots from it for scheduled tasks.
* The physical slot is lazily allocated for a shared slot, upon any hosted subtask asking for the
* shared slot. Each subsequent sharing subtask allocates a logical slot from the existing shared
* slot. The shared/physical slot can be released only if all the requested logical slots are
* released or canceled.
*/
class SlotSharingExecutionSlotAllocator implements ExecutionSlotAllocator {
private static final Logger LOG =
LoggerFactory.getLogger(SlotSharingExecutionSlotAllocator.class);
private final PhysicalSlotProvider slotProvider;
private final boolean slotWillBeOccupiedIndefinitely;
private final SlotSharingStrategy slotSharingStrategy;
private final Map sharedSlots;
private final SharedSlotProfileRetrieverFactory sharedSlotProfileRetrieverFactory;
private final PhysicalSlotRequestBulkChecker bulkChecker;
private final Duration allocationTimeout;
private final Function resourceProfileRetriever;
SlotSharingExecutionSlotAllocator(
PhysicalSlotProvider slotProvider,
boolean slotWillBeOccupiedIndefinitely,
SlotSharingStrategy slotSharingStrategy,
SharedSlotProfileRetrieverFactory sharedSlotProfileRetrieverFactory,
PhysicalSlotRequestBulkChecker bulkChecker,
Duration allocationTimeout,
Function resourceProfileRetriever) {
this.slotProvider = checkNotNull(slotProvider);
this.slotWillBeOccupiedIndefinitely = slotWillBeOccupiedIndefinitely;
this.slotSharingStrategy = checkNotNull(slotSharingStrategy);
this.sharedSlotProfileRetrieverFactory = checkNotNull(sharedSlotProfileRetrieverFactory);
this.bulkChecker = checkNotNull(bulkChecker);
this.allocationTimeout = checkNotNull(allocationTimeout);
this.resourceProfileRetriever = checkNotNull(resourceProfileRetriever);
this.sharedSlots = new IdentityHashMap<>();
this.slotProvider.disableBatchSlotRequestTimeoutCheck();
}
@Override
public Map allocateSlotsFor(
List executionAttemptIds) {
final Map vertexIdToExecutionId = new HashMap<>();
executionAttemptIds.forEach(
executionId ->
vertexIdToExecutionId.put(executionId.getExecutionVertexId(), executionId));
checkState(
vertexIdToExecutionId.size() == executionAttemptIds.size(),
"SlotSharingExecutionSlotAllocator does not support one execution vertex to have multiple concurrent executions");
final List vertexIds =
executionAttemptIds.stream()
.map(ExecutionAttemptID::getExecutionVertexId)
.collect(Collectors.toList());
return allocateSlotsForVertices(vertexIds).stream()
.collect(
Collectors.toMap(
vertexAssignment ->
vertexIdToExecutionId.get(
vertexAssignment.getExecutionVertexId()),
vertexAssignment ->
new ExecutionSlotAssignment(
vertexIdToExecutionId.get(
vertexAssignment.getExecutionVertexId()),
vertexAssignment.getLogicalSlotFuture())));
}
/**
* Creates logical {@link SlotExecutionVertexAssignment}s from physical shared slots.
*
* The allocation has the following steps:
*
*
* - Map the executions to {@link ExecutionSlotSharingGroup}s using {@link
* SlotSharingStrategy}
*
- Check which {@link ExecutionSlotSharingGroup}s already have shared slot
*
- For all involved {@link ExecutionSlotSharingGroup}s which do not have a shared slot
* yet:
*
- Create a {@link SlotProfile} future using {@link SharedSlotProfileRetriever} and then
*
- Allocate a physical slot from the {@link PhysicalSlotProvider}
*
- Create a shared slot based on the returned physical slot futures
*
- Allocate logical slot futures for the executions from all corresponding shared slots.
*
- If a physical slot request fails, associated logical slot requests are canceled within
* the shared slot
*
- Generate {@link SlotExecutionVertexAssignment}s based on the logical slot futures and
* returns the results.
*
*
* @param executionVertexIds Execution vertices to allocate slots for
*/
private List allocateSlotsForVertices(
List executionVertexIds) {
SharedSlotProfileRetriever sharedSlotProfileRetriever =
sharedSlotProfileRetrieverFactory.createFromBulk(new HashSet<>(executionVertexIds));
Map> executionsByGroup =
executionVertexIds.stream()
.collect(
Collectors.groupingBy(
slotSharingStrategy::getExecutionSlotSharingGroup));
Map slots = new HashMap<>(executionsByGroup.size());
Set groupsToAssign = new HashSet<>(executionsByGroup.keySet());
Map assignedSlots =
tryAssignExistingSharedSlots(groupsToAssign);
slots.putAll(assignedSlots);
groupsToAssign.removeAll(assignedSlots.keySet());
if (!groupsToAssign.isEmpty()) {
Map allocatedSlots =
allocateSharedSlots(groupsToAssign, sharedSlotProfileRetriever);
slots.putAll(allocatedSlots);
groupsToAssign.removeAll(allocatedSlots.keySet());
Preconditions.checkState(groupsToAssign.isEmpty());
}
Map assignments =
allocateLogicalSlotsFromSharedSlots(slots, executionsByGroup);
// we need to pass the slots map to the createBulk method instead of using the allocator's
// 'sharedSlots'
// because if any physical slots have already failed, their shared slots have been removed
// from the allocator's 'sharedSlots' by failed logical slots.
SharingPhysicalSlotRequestBulk bulk = createBulk(slots, executionsByGroup);
bulkChecker.schedulePendingRequestBulkTimeoutCheck(bulk, allocationTimeout);
return executionVertexIds.stream().map(assignments::get).collect(Collectors.toList());
}
@Override
public void cancel(ExecutionAttemptID executionAttemptId) {
cancelLogicalSlotRequest(executionAttemptId.getExecutionVertexId(), null);
}
private void cancelLogicalSlotRequest(ExecutionVertexID executionVertexId, Throwable cause) {
ExecutionSlotSharingGroup executionSlotSharingGroup =
slotSharingStrategy.getExecutionSlotSharingGroup(executionVertexId);
checkNotNull(
executionSlotSharingGroup,
"There is no ExecutionSlotSharingGroup for ExecutionVertexID " + executionVertexId);
SharedSlot slot = sharedSlots.get(executionSlotSharingGroup);
if (slot != null) {
slot.cancelLogicalSlotRequest(executionVertexId, cause);
} else {
LOG.debug(
"There is no SharedSlot for ExecutionSlotSharingGroup of ExecutionVertexID {}",
executionVertexId);
}
}
private static Map
allocateLogicalSlotsFromSharedSlots(
Map slots,
Map> executionsByGroup) {
Map assignments = new HashMap<>();
for (Map.Entry> entry :
executionsByGroup.entrySet()) {
ExecutionSlotSharingGroup group = entry.getKey();
List executionIds = entry.getValue();
for (ExecutionVertexID executionId : executionIds) {
CompletableFuture logicalSlotFuture =
slots.get(group).allocateLogicalSlot(executionId);
SlotExecutionVertexAssignment assignment =
new SlotExecutionVertexAssignment(executionId, logicalSlotFuture);
assignments.put(executionId, assignment);
}
}
return assignments;
}
private Map tryAssignExistingSharedSlots(
Set executionSlotSharingGroups) {
Map assignedSlots =
new HashMap<>(executionSlotSharingGroups.size());
for (ExecutionSlotSharingGroup group : executionSlotSharingGroups) {
SharedSlot sharedSlot = sharedSlots.get(group);
if (sharedSlot != null) {
assignedSlots.put(group, sharedSlot);
}
}
return assignedSlots;
}
private Map allocateSharedSlots(
Set executionSlotSharingGroups,
SharedSlotProfileRetriever sharedSlotProfileRetriever) {
List slotRequests = new ArrayList<>();
Map allocatedSlots = new HashMap<>();
Map requestToGroup = new HashMap<>();
Map requestToPhysicalResources = new HashMap<>();
for (ExecutionSlotSharingGroup group : executionSlotSharingGroups) {
SlotRequestId physicalSlotRequestId = new SlotRequestId();
ResourceProfile physicalSlotResourceProfile = getPhysicalSlotResourceProfile(group);
SlotProfile slotProfile =
sharedSlotProfileRetriever.getSlotProfile(group, physicalSlotResourceProfile);
PhysicalSlotRequest request =
new PhysicalSlotRequest(
physicalSlotRequestId, slotProfile, slotWillBeOccupiedIndefinitely);
slotRequests.add(request);
requestToGroup.put(physicalSlotRequestId, group);
requestToPhysicalResources.put(physicalSlotRequestId, physicalSlotResourceProfile);
}
Map> allocateResult =
slotProvider.allocatePhysicalSlots(slotRequests);
allocateResult.forEach(
(slotRequestId, resultCompletableFuture) -> {
ExecutionSlotSharingGroup group = requestToGroup.get(slotRequestId);
CompletableFuture physicalSlotFuture =
resultCompletableFuture.thenApply(
PhysicalSlotRequest.Result::getPhysicalSlot);
SharedSlot slot =
new SharedSlot(
slotRequestId,
requestToPhysicalResources.get(slotRequestId),
group,
physicalSlotFuture,
slotWillBeOccupiedIndefinitely,
this::releaseSharedSlot);
allocatedSlots.put(group, slot);
Preconditions.checkState(!sharedSlots.containsKey(group));
sharedSlots.put(group, slot);
});
return allocatedSlots;
}
private void releaseSharedSlot(ExecutionSlotSharingGroup executionSlotSharingGroup) {
SharedSlot slot = sharedSlots.remove(executionSlotSharingGroup);
Preconditions.checkNotNull(slot);
Preconditions.checkState(
slot.isEmpty(),
"Trying to remove a shared slot with physical request id %s which has assigned logical slots",
slot.getPhysicalSlotRequestId());
slotProvider.cancelSlotRequest(
slot.getPhysicalSlotRequestId(),
new FlinkException(
"Slot is being returned from SlotSharingExecutionSlotAllocator."));
}
private ResourceProfile getPhysicalSlotResourceProfile(
ExecutionSlotSharingGroup executionSlotSharingGroup) {
if (!executionSlotSharingGroup.getResourceProfile().equals(ResourceProfile.UNKNOWN)) {
return executionSlotSharingGroup.getResourceProfile();
} else {
return executionSlotSharingGroup.getExecutionVertexIds().stream()
.reduce(
ResourceProfile.ZERO,
(r, e) -> r.merge(resourceProfileRetriever.apply(e)),
ResourceProfile::merge);
}
}
private SharingPhysicalSlotRequestBulk createBulk(
Map slots,
Map> executions) {
Map pendingRequests =
executions.keySet().stream()
.collect(
Collectors.toMap(
group -> group,
group ->
slots.get(group).getPhysicalSlotResourceProfile()));
SharingPhysicalSlotRequestBulk bulk =
new SharingPhysicalSlotRequestBulk(
executions, pendingRequests, this::cancelLogicalSlotRequest);
registerPhysicalSlotRequestBulkCallbacks(slots, executions.keySet(), bulk);
return bulk;
}
private static void registerPhysicalSlotRequestBulkCallbacks(
Map slots,
Iterable executions,
SharingPhysicalSlotRequestBulk bulk) {
for (ExecutionSlotSharingGroup group : executions) {
CompletableFuture slotContextFuture =
slots.get(group).getSlotContextFuture();
slotContextFuture.thenAccept(
physicalSlot -> bulk.markFulfilled(group, physicalSlot.getAllocationId()));
slotContextFuture.exceptionally(
t -> {
// clear the bulk to stop the fulfillability check
bulk.clearPendingRequests();
return null;
});
}
}
/** The slot assignment for an {@link ExecutionVertex}. */
private static class SlotExecutionVertexAssignment {
private final ExecutionVertexID executionVertexId;
private final CompletableFuture logicalSlotFuture;
SlotExecutionVertexAssignment(
ExecutionVertexID executionVertexId,
CompletableFuture logicalSlotFuture) {
this.executionVertexId = checkNotNull(executionVertexId);
this.logicalSlotFuture = checkNotNull(logicalSlotFuture);
}
ExecutionVertexID getExecutionVertexId() {
return executionVertexId;
}
CompletableFuture getLogicalSlotFuture() {
return logicalSlotFuture;
}
}
}