All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.scheduler.SlotSharingExecutionSlotAllocator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.scheduler;

import org.apache.flink.api.common.time.Time;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.clusterframework.types.SlotProfile;
import org.apache.flink.runtime.jobmaster.LogicalSlot;
import org.apache.flink.runtime.jobmaster.SlotRequestId;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotRequest;
import org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotRequestBulkChecker;
import org.apache.flink.runtime.scheduler.SharedSlotProfileRetriever.SharedSlotProfileRetrieverFactory;
import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.function.Function;
import java.util.stream.Collectors;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * Allocates {@link LogicalSlot}s from physical shared slots.
 *
 * 

The allocator maintains a shared slot for each {@link ExecutionSlotSharingGroup}. It allocates * a physical slot for the shared slot and then allocates logical slots from it for scheduled tasks. * The physical slot is lazily allocated for a shared slot, upon any hosted subtask asking for the * shared slot. Each subsequent sharing subtask allocates a logical slot from the existing shared * slot. The shared/physical slot can be released only if all the requested logical slots are * released or canceled. */ class SlotSharingExecutionSlotAllocator implements ExecutionSlotAllocator { private static final Logger LOG = LoggerFactory.getLogger(SlotSharingExecutionSlotAllocator.class); private final PhysicalSlotProvider slotProvider; private final boolean slotWillBeOccupiedIndefinitely; private final SlotSharingStrategy slotSharingStrategy; private final Map sharedSlots; private final SharedSlotProfileRetrieverFactory sharedSlotProfileRetrieverFactory; private final PhysicalSlotRequestBulkChecker bulkChecker; private final Time allocationTimeout; private final Function resourceProfileRetriever; SlotSharingExecutionSlotAllocator( PhysicalSlotProvider slotProvider, boolean slotWillBeOccupiedIndefinitely, SlotSharingStrategy slotSharingStrategy, SharedSlotProfileRetrieverFactory sharedSlotProfileRetrieverFactory, PhysicalSlotRequestBulkChecker bulkChecker, Time allocationTimeout, Function resourceProfileRetriever) { this.slotProvider = checkNotNull(slotProvider); this.slotWillBeOccupiedIndefinitely = slotWillBeOccupiedIndefinitely; this.slotSharingStrategy = checkNotNull(slotSharingStrategy); this.sharedSlotProfileRetrieverFactory = checkNotNull(sharedSlotProfileRetrieverFactory); this.bulkChecker = checkNotNull(bulkChecker); this.allocationTimeout = checkNotNull(allocationTimeout); this.resourceProfileRetriever = checkNotNull(resourceProfileRetriever); this.sharedSlots = new IdentityHashMap<>(); } /** * Creates logical {@link SlotExecutionVertexAssignment}s from physical shared slots. * *

The allocation has the following steps: * *

    *
  1. Map the executions to {@link ExecutionSlotSharingGroup}s using {@link * SlotSharingStrategy} *
  2. Check which {@link ExecutionSlotSharingGroup}s already have shared slot *
  3. For all involved {@link ExecutionSlotSharingGroup}s which do not have a shared slot * yet: *
  4. Create a {@link SlotProfile} future using {@link SharedSlotProfileRetriever} and then *
  5. Allocate a physical slot from the {@link PhysicalSlotProvider} *
  6. Create a shared slot based on the returned physical slot futures *
  7. Allocate logical slot futures for the executions from all corresponding shared slots. *
  8. If a physical slot request fails, associated logical slot requests are canceled within * the shared slot *
  9. Generate {@link SlotExecutionVertexAssignment}s based on the logical slot futures and * returns the results. *
* * @param executionVertexIds Execution vertices to allocate slots for */ @Override public List allocateSlotsFor( List executionVertexIds) { SharedSlotProfileRetriever sharedSlotProfileRetriever = sharedSlotProfileRetrieverFactory.createFromBulk(new HashSet<>(executionVertexIds)); Map> executionsByGroup = executionVertexIds.stream() .collect( Collectors.groupingBy( slotSharingStrategy::getExecutionSlotSharingGroup)); Map slots = executionsByGroup.keySet().stream() .map(group -> getOrAllocateSharedSlot(group, sharedSlotProfileRetriever)) .collect( Collectors.toMap( SharedSlot::getExecutionSlotSharingGroup, Function.identity())); Map assignments = allocateLogicalSlotsFromSharedSlots(slots, executionsByGroup); // we need to pass the slots map to the createBulk method instead of using the allocator's // 'sharedSlots' // because if any physical slots have already failed, their shared slots have been removed // from the allocator's 'sharedSlots' by failed logical slots. SharingPhysicalSlotRequestBulk bulk = createBulk(slots, executionsByGroup); bulkChecker.schedulePendingRequestBulkTimeoutCheck(bulk, allocationTimeout); return executionVertexIds.stream().map(assignments::get).collect(Collectors.toList()); } @Override public void cancel(ExecutionVertexID executionVertexId) { cancelLogicalSlotRequest(executionVertexId, null); } private void cancelLogicalSlotRequest(ExecutionVertexID executionVertexId, Throwable cause) { ExecutionSlotSharingGroup executionSlotSharingGroup = slotSharingStrategy.getExecutionSlotSharingGroup(executionVertexId); checkNotNull( executionSlotSharingGroup, "There is no ExecutionSlotSharingGroup for ExecutionVertexID " + executionVertexId); SharedSlot slot = sharedSlots.get(executionSlotSharingGroup); if (slot != null) { slot.cancelLogicalSlotRequest(executionVertexId, cause); } else { LOG.debug( "There is no SharedSlot for ExecutionSlotSharingGroup of ExecutionVertexID {}", executionVertexId); } } private static Map allocateLogicalSlotsFromSharedSlots( Map slots, Map> executionsByGroup) { Map assignments = new HashMap<>(); for (Map.Entry> entry : executionsByGroup.entrySet()) { ExecutionSlotSharingGroup group = entry.getKey(); List executionIds = entry.getValue(); for (ExecutionVertexID executionId : executionIds) { CompletableFuture logicalSlotFuture = slots.get(group).allocateLogicalSlot(executionId); SlotExecutionVertexAssignment assignment = new SlotExecutionVertexAssignment(executionId, logicalSlotFuture); assignments.put(executionId, assignment); } } return assignments; } private SharedSlot getOrAllocateSharedSlot( ExecutionSlotSharingGroup executionSlotSharingGroup, SharedSlotProfileRetriever sharedSlotProfileRetriever) { return sharedSlots.computeIfAbsent( executionSlotSharingGroup, group -> { SlotRequestId physicalSlotRequestId = new SlotRequestId(); ResourceProfile physicalSlotResourceProfile = getPhysicalSlotResourceProfile(group); SlotProfile slotProfile = sharedSlotProfileRetriever.getSlotProfile( group, physicalSlotResourceProfile); PhysicalSlotRequest physicalSlotRequest = new PhysicalSlotRequest( physicalSlotRequestId, slotProfile, slotWillBeOccupiedIndefinitely); CompletableFuture physicalSlotFuture = slotProvider .allocatePhysicalSlot(physicalSlotRequest) .thenApply(PhysicalSlotRequest.Result::getPhysicalSlot); return new SharedSlot( physicalSlotRequestId, physicalSlotResourceProfile, group, physicalSlotFuture, slotWillBeOccupiedIndefinitely, this::releaseSharedSlot); }); } private void releaseSharedSlot(ExecutionSlotSharingGroup executionSlotSharingGroup) { SharedSlot slot = sharedSlots.remove(executionSlotSharingGroup); Preconditions.checkNotNull(slot); Preconditions.checkState( slot.isEmpty(), "Trying to remove a shared slot with physical request id %s which has assigned logical slots", slot.getPhysicalSlotRequestId()); slotProvider.cancelSlotRequest( slot.getPhysicalSlotRequestId(), new FlinkException( "Slot is being returned from SlotSharingExecutionSlotAllocator.")); } private ResourceProfile getPhysicalSlotResourceProfile( ExecutionSlotSharingGroup executionSlotSharingGroup) { if (!executionSlotSharingGroup.getResourceProfile().equals(ResourceProfile.UNKNOWN)) { return executionSlotSharingGroup.getResourceProfile(); } else { return executionSlotSharingGroup.getExecutionVertexIds().stream() .reduce( ResourceProfile.ZERO, (r, e) -> r.merge(resourceProfileRetriever.apply(e)), ResourceProfile::merge); } } private SharingPhysicalSlotRequestBulk createBulk( Map slots, Map> executions) { Map pendingRequests = executions.keySet().stream() .collect( Collectors.toMap( group -> group, group -> slots.get(group).getPhysicalSlotResourceProfile())); SharingPhysicalSlotRequestBulk bulk = new SharingPhysicalSlotRequestBulk( executions, pendingRequests, this::cancelLogicalSlotRequest); registerPhysicalSlotRequestBulkCallbacks(slots, executions.keySet(), bulk); return bulk; } private static void registerPhysicalSlotRequestBulkCallbacks( Map slots, Iterable executions, SharingPhysicalSlotRequestBulk bulk) { for (ExecutionSlotSharingGroup group : executions) { CompletableFuture slotContextFuture = slots.get(group).getSlotContextFuture(); slotContextFuture.thenAccept( physicalSlot -> bulk.markFulfilled(group, physicalSlot.getAllocationId())); slotContextFuture.exceptionally( t -> { // clear the bulk to stop the fulfillability check bulk.clearPendingRequests(); return null; }); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy