org.apache.flink.runtime.scheduler.LocalInputPreferredSlotSharingStrategy Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.scheduler;

import org.apache.flink.runtime.instance.SlotSharingGroupId;
import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.runtime.scheduler.adapter.DefaultExecutionTopology;
import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
import org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex;
import org.apache.flink.runtime.scheduler.strategy.SchedulingTopology;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * This strategy tries to reduce remote data exchanges. Execution vertices, which are connected and
 * belong to the same SlotSharingGroup, tend to be put in the same ExecutionSlotSharingGroup.
 * Co-location constraints will be respected.
 */
class LocalInputPreferredSlotSharingStrategy
        implements SlotSharingStrategy, SchedulingTopologyListener {

    private final Map executionSlotSharingGroupMap;

    private final Set logicalSlotSharingGroups;

    private final Set coLocationGroups;

    LocalInputPreferredSlotSharingStrategy(
            final SchedulingTopology topology,
            final Set logicalSlotSharingGroups,
            final Set coLocationGroups) {

        this.logicalSlotSharingGroups = checkNotNull(logicalSlotSharingGroups);
        this.coLocationGroups = checkNotNull(coLocationGroups);

        this.executionSlotSharingGroupMap =
                new ExecutionSlotSharingGroupBuilder(
                                topology, logicalSlotSharingGroups, coLocationGroups)
                        .build();
        topology.registerSchedulingTopologyListener(this);
    }

    @Override
    public ExecutionSlotSharingGroup getExecutionSlotSharingGroup(
            final ExecutionVertexID executionVertexId) {
        return executionSlotSharingGroupMap.get(executionVertexId);
    }

    @Override
    public Set getExecutionSlotSharingGroups() {
        return new HashSet<>(executionSlotSharingGroupMap.values());
    }

    @Override
    public void notifySchedulingTopologyUpdated(
            SchedulingTopology schedulingTopology, List newExecutionVertices) {

        final Map newMap =
                new LocalInputPreferredSlotSharingStrategy.ExecutionSlotSharingGroupBuilder(
                                schedulingTopology, logicalSlotSharingGroups, coLocationGroups)
                        .build();

        for (ExecutionVertexID vertexId : newMap.keySet()) {
            final ExecutionSlotSharingGroup newEssg = newMap.get(vertexId);
            final ExecutionSlotSharingGroup oldEssg = executionSlotSharingGroupMap.get(vertexId);
            if (oldEssg == null) {
                executionSlotSharingGroupMap.put(vertexId, newEssg);
            } else {
                // ensures that existing slot sharing groups are not changed
                checkState(
                        oldEssg.getExecutionVertexIds().equals(newEssg.getExecutionVertexIds()),
                        "Existing ExecutionSlotSharingGroups are changed after topology update");
            }
        }
    }

    static class Factory implements SlotSharingStrategy.Factory {

        public LocalInputPreferredSlotSharingStrategy create(
                final SchedulingTopology topology,
                final Set logicalSlotSharingGroups,
                final Set coLocationGroups) {

            return new LocalInputPreferredSlotSharingStrategy(
                    topology, logicalSlotSharingGroups, coLocationGroups);
        }
    }

    private static class ExecutionSlotSharingGroupBuilder {
        private final SchedulingTopology topology;

        private final Map slotSharingGroupMap;

        private final Map coLocationGroupMap;

        private final Map
                executionSlotSharingGroupMap;

        private final Map
                constraintToExecutionSlotSharingGroupMap;

        private final Map>
                executionSlotSharingGroups;

        /**
         * A JobVertex only belongs to one {@link SlotSharingGroup}. A SlotSharingGroup is
         * corresponding to a set of {@link ExecutionSlotSharingGroup}s. We can maintain available
         * ExecutionSlotSharingGroups for each JobVertex.
         *
         * Once an ExecutionSlotSharingGroup is created, it becomes available for all JobVertices
         * in the corresponding SlotSharingGroup in the beginning.
         *
         * 
Once a SchedulingExecutionVertex is added to the ExecutionSlotSharingGroup, the group
         * is no longer available for other SchedulingExecutionVertices with the same JobVertexID.
         *
         * 
Here we use {@link LinkedHashSet} to reserve the order the same as the
         * SchedulingVertices are traversed.
         */
        private final Map>
                availableGroupsForJobVertex;

        /**
         * Maintains the candidate {@link ExecutionSlotSharingGroup}s for every {@link
         * ConsumedPartitionGroup}. The ConsumedPartitionGroup represents a group of partitions that
         * is consumed by the same ExecutionVertices. These ExecutionVertices belong to one consumer
         * JobVertex. Thus, we can say, a ConsumedPartitionGroup is corresponding to one consumer
         * JobVertex.
         *
         * 
This mapping is used to find an available producer ExecutionSlotSharingGroup for the
         * consumer vertex. If a candidate group is available for this consumer vertex, it will be
         * assigned to this vertex.
         *
         * 
The candidate groups are computed in {@link
         * #computeAllCandidateGroupsForConsumedPartitionGroup} when the ConsumedPartitionGroup is
         * traversed for the first time.
         *
         * 
Here we use {@link LinkedHashSet} to reserve the order the same as the
         * SchedulingVertices are traversed.
         */
        private final Map>
                candidateGroupsForConsumedPartitionGroup;

        private ExecutionSlotSharingGroupBuilder(
                final SchedulingTopology topology,
                final Set logicalSlotSharingGroups,
                final Set coLocationGroups) {

            this.topology = checkNotNull(topology);

            this.slotSharingGroupMap = new HashMap<>();
            for (SlotSharingGroup slotSharingGroup : logicalSlotSharingGroups) {
                for (JobVertexID jobVertexId : slotSharingGroup.getJobVertexIds()) {
                    slotSharingGroupMap.put(jobVertexId, slotSharingGroup);
                }
            }

            this.coLocationGroupMap = new HashMap<>();
            for (CoLocationGroup coLocationGroup : coLocationGroups) {
                for (JobVertexID jobVertexId : coLocationGroup.getVertexIds()) {
                    coLocationGroupMap.put(jobVertexId, coLocationGroup);
                }
            }

            executionSlotSharingGroupMap = new HashMap<>();
            constraintToExecutionSlotSharingGroupMap = new HashMap<>();
            executionSlotSharingGroups = new HashMap<>();
            availableGroupsForJobVertex = new HashMap<>();
            candidateGroupsForConsumedPartitionGroup = new IdentityHashMap<>();
        }

        /**
         * Build ExecutionSlotSharingGroups for all vertices in the topology. The
         * ExecutionSlotSharingGroup of a vertex is determined in order below:
         *
         * 
1. try finding an existing group of the corresponding co-location constraint.
         *
         * 
2. try finding an available group of its producer vertex if the producer is in the
         * same slot sharing group.
         *
         * 
3. try finding any available group.
         *
         * 4. create a new group.
         */
        private Map build() {
            final LinkedHashMap> allVertices =
                    getExecutionVertices();

            // loop on job vertices so that an execution vertex will not be added into a group
            // if that group better fits another execution vertex
            for (List executionVertices : allVertices.values()) {
                final List remaining =
                        tryFindOptimalAvailableExecutionSlotSharingGroupFor(executionVertices);

                findAvailableOrCreateNewExecutionSlotSharingGroupFor(remaining);

                updateConstraintToExecutionSlotSharingGroupMap(executionVertices);
            }

            return executionSlotSharingGroupMap;
        }

        /**
         * The vertices are topologically sorted since {@link DefaultExecutionTopology#getVertices}
         * are topologically sorted.
         */
        private LinkedHashMap> getExecutionVertices() {
            final LinkedHashMap> vertices =
                    new LinkedHashMap<>();
            for (SchedulingExecutionVertex executionVertex : topology.getVertices()) {
                final List executionVertexGroup =
                        vertices.computeIfAbsent(
                                executionVertex.getId().getJobVertexId(), k -> new ArrayList<>());
                executionVertexGroup.add(executionVertex);
            }
            return vertices;
        }

        private List tryFindOptimalAvailableExecutionSlotSharingGroupFor(
                final List executionVertices) {

            final List remaining = new ArrayList<>();
            for (SchedulingExecutionVertex executionVertex : executionVertices) {
                ExecutionSlotSharingGroup group =
                        tryFindAvailableCoLocatedExecutionSlotSharingGroupFor(executionVertex);

                if (group == null) {
                    group = tryFindAvailableProducerExecutionSlotSharingGroupFor(executionVertex);
                }

                if (group == null) {
                    remaining.add(executionVertex);
                } else {
                    addVertexToExecutionSlotSharingGroup(executionVertex, group);
                }
            }

            return remaining;
        }

        private ExecutionSlotSharingGroup tryFindAvailableCoLocatedExecutionSlotSharingGroupFor(
                final SchedulingExecutionVertex executionVertex) {

            final ExecutionVertexID executionVertexId = executionVertex.getId();
            final CoLocationGroup coLocationGroup =
                    coLocationGroupMap.get(executionVertexId.getJobVertexId());
            if (coLocationGroup != null) {
                final CoLocationConstraint constraint =
                        coLocationGroup.getLocationConstraint(executionVertexId.getSubtaskIndex());

                return constraintToExecutionSlotSharingGroupMap.get(constraint);
            } else {
                return null;
            }
        }

        private ExecutionSlotSharingGroup tryFindAvailableProducerExecutionSlotSharingGroupFor(
                final SchedulingExecutionVertex executionVertex) {

            final ExecutionVertexID executionVertexId = executionVertex.getId();

            for (ConsumedPartitionGroup consumedPartitionGroup :
                    executionVertex.getConsumedPartitionGroups()) {

                Set candidateGroups =
                        candidateGroupsForConsumedPartitionGroup.computeIfAbsent(
                                consumedPartitionGroup,
                                group ->
                                        computeAllCandidateGroupsForConsumedPartitionGroup(
                                                executionVertexId.getJobVertexId(), group));

                Iterator candidateIterator = candidateGroups.iterator();

                while (candidateIterator.hasNext()) {
                    ExecutionSlotSharingGroup candidateGroup = candidateIterator.next();
                    // There are two cases for this candidate group:
                    //
                    // 1. The group is available for this vertex, and it will be assigned to this
                    // vertex;
                    // 2. The group is not available for this vertex, because it's already assigned
                    // to another vertex with the same JobVertexID.
                    //
                    // No matter what case it is, the candidate group is no longer a candidate and
                    // should be removed.
                    candidateIterator.remove();
                    if (isExecutionSlotSharingGroupAvailableForVertex(
                            candidateGroup, executionVertexId)) {
                        return candidateGroup;
                    }
                }
            }

            return null;
        }

        private boolean isExecutionSlotSharingGroupAvailableForVertex(
                ExecutionSlotSharingGroup executionSlotSharingGroup, ExecutionVertexID vertexId) {

            Set availableGroupsForCurrentVertex =
                    availableGroupsForJobVertex.get(vertexId.getJobVertexId());

            return availableGroupsForCurrentVertex != null
                    && availableGroupsForCurrentVertex.contains(executionSlotSharingGroup);
        }

        private boolean inSameLogicalSlotSharingGroup(
                final JobVertexID jobVertexId1, final JobVertexID jobVertexId2) {

            return Objects.equals(
                    getSlotSharingGroup(jobVertexId1).getSlotSharingGroupId(),
                    getSlotSharingGroup(jobVertexId2).getSlotSharingGroupId());
        }

        private SlotSharingGroup getSlotSharingGroup(final JobVertexID jobVertexId) {
            // slot sharing group of a vertex would never be null in production
            return checkNotNull(slotSharingGroupMap.get(jobVertexId));
        }

        private void addVertexToExecutionSlotSharingGroup(
                final SchedulingExecutionVertex vertex, final ExecutionSlotSharingGroup group) {

            ExecutionVertexID executionVertexId = vertex.getId();
            group.addVertex(executionVertexId);
            executionSlotSharingGroupMap.put(executionVertexId, group);

            // The ExecutionSlotSharingGroup is no longer available for the JobVertex
            Set availableExecutionSlotSharingGroups =
                    availableGroupsForJobVertex.get(executionVertexId.getJobVertexId());
            if (availableExecutionSlotSharingGroups != null) {
                availableExecutionSlotSharingGroups.remove(group);
            }
        }

        private void findAvailableOrCreateNewExecutionSlotSharingGroupFor(
                final List executionVertices) {

            for (SchedulingExecutionVertex executionVertex : executionVertices) {

                ExecutionSlotSharingGroup group =
                        tryFindAvailableExecutionSlotSharingGroupFor(executionVertex);

                if (group == null) {
                    group = createNewExecutionSlotSharingGroup(executionVertex.getId());
                }

                addVertexToExecutionSlotSharingGroup(executionVertex, group);
            }
        }

        private ExecutionSlotSharingGroup tryFindAvailableExecutionSlotSharingGroupFor(
                SchedulingExecutionVertex executionVertex) {

            Set availableGroupsForCurrentVertex =
                    availableGroupsForJobVertex.get(executionVertex.getId().getJobVertexId());

            if (availableGroupsForCurrentVertex != null
                    && !availableGroupsForCurrentVertex.isEmpty()) {
                return availableGroupsForCurrentVertex.iterator().next();
            }

            return null;
        }

        private ExecutionSlotSharingGroup createNewExecutionSlotSharingGroup(
                ExecutionVertexID executionVertexId) {
            final SlotSharingGroup slotSharingGroup =
                    getSlotSharingGroup(executionVertexId.getJobVertexId());
            final List correspondingExecutionSlotSharingGroups =
                    executionSlotSharingGroups.computeIfAbsent(
                            slotSharingGroup.getSlotSharingGroupId(), k -> new ArrayList<>());

            final ExecutionSlotSharingGroup newGroup = new ExecutionSlotSharingGroup();
            newGroup.setResourceProfile(slotSharingGroup.getResourceProfile());

            correspondingExecutionSlotSharingGroups.add(newGroup);

            // Once a new ExecutionSlotSharingGroup is created, it's available for all JobVertices
            // in this SlotSharingGroup
            for (JobVertexID jobVertexId : slotSharingGroup.getJobVertexIds()) {
                Set availableExecutionSlotSharingGroups =
                        availableGroupsForJobVertex.computeIfAbsent(
                                jobVertexId, ignore -> new LinkedHashSet<>());
                availableExecutionSlotSharingGroups.add(newGroup);
            }

            return newGroup;
        }

        private void updateConstraintToExecutionSlotSharingGroupMap(
                final List executionVertices) {

            for (SchedulingExecutionVertex executionVertex : executionVertices) {
                final ExecutionVertexID executionVertexId = executionVertex.getId();
                final CoLocationGroup coLocationGroup =
                        coLocationGroupMap.get(executionVertexId.getJobVertexId());
                if (coLocationGroup != null) {
                    final CoLocationConstraint constraint =
                            coLocationGroup.getLocationConstraint(
                                    executionVertexId.getSubtaskIndex());

                    constraintToExecutionSlotSharingGroupMap.put(
                            constraint, executionSlotSharingGroupMap.get(executionVertexId));
                }
            }
        }

        private LinkedHashSet
                computeAllCandidateGroupsForConsumedPartitionGroup(
                        JobVertexID consumerJobVertexId,
                        ConsumedPartitionGroup consumedPartitionGroup) {

            // We tend to reserve the order of ExecutionSlotSharingGroups as they are traversed
            // topologically
            final LinkedHashSet candidateExecutionSlotSharingGroups =
                    new LinkedHashSet<>();

            JobVertexID producerJobVertexId =
                    topology.getResultPartition(consumedPartitionGroup.getFirst())
                            .getProducer()
                            .getId()
                            .getJobVertexId();

            // Check if the producer JobVertex and the consumer JobVertex are in the same
            // SlotSharingGroup
            if (inSameLogicalSlotSharingGroup(producerJobVertexId, consumerJobVertexId)) {

                // Iterate over the producer ExecutionVertices of all the partitions in the
                // ConsumedPartitionGroup
                for (IntermediateResultPartitionID consumedPartition : consumedPartitionGroup) {

                    ExecutionVertexID producerExecutionVertexId =
                            topology.getResultPartition(consumedPartition).getProducer().getId();

                    ExecutionSlotSharingGroup assignedGroupForProducerExecutionVertex =
                            executionSlotSharingGroupMap.get(producerExecutionVertexId);
                    checkNotNull(assignedGroupForProducerExecutionVertex);

                    candidateExecutionSlotSharingGroups.add(
                            assignedGroupForProducerExecutionVertex);
                }
            }

            return candidateExecutionSlotSharingGroups;
        }
    }
}