All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.scheduler.adaptive.CreatingExecutionGraph Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.scheduler.adaptive;

import org.apache.flink.api.common.JobStatus;
import org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.ExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionJobVertex;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.jobgraph.JobType;
import org.apache.flink.runtime.jobgraph.jsonplan.JsonPlanGenerator;
import org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup;
import org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler;
import org.apache.flink.runtime.scheduler.ExecutionGraphHandler;
import org.apache.flink.runtime.scheduler.GlobalFailureHandler;
import org.apache.flink.runtime.scheduler.OperatorCoordinatorHandler;
import org.apache.flink.runtime.scheduler.adaptive.allocator.VertexParallelism;
import org.apache.flink.util.IterableUtils;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.concurrent.FutureUtils;

import org.slf4j.Logger;

import javax.annotation.Nullable;

import java.time.Duration;
import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledFuture;

/**
 * State which waits for the creation of the {@link ExecutionGraph}. If the creation fails, then the
 * state transitions to {@link Finished}. If the creation succeeds, then the system tries to assign
 * the required slots. If the set of available slots has changed so that the created {@link
 * ExecutionGraph} cannot be executed, the state transitions back into {@link WaitingForResources}.
 * If there are enough slots for the {@link ExecutionGraph} to run, the state transitions to {@link
 * Executing}.
 */
public class CreatingExecutionGraph extends StateWithoutExecutionGraph {

    private final Context context;
    private final OperatorCoordinatorHandlerFactory operatorCoordinatorHandlerFactory;

    private final @Nullable ExecutionGraph previousExecutionGraph;

    public CreatingExecutionGraph(
            Context context,
            CompletableFuture
                    executionGraphWithParallelismFuture,
            Logger logger,
            OperatorCoordinatorHandlerFactory operatorCoordinatorFactory,
            @Nullable ExecutionGraph previousExecutionGraph) {
        super(context, logger);
        this.context = context;
        this.operatorCoordinatorHandlerFactory = operatorCoordinatorFactory;

        FutureUtils.assertNoException(
                executionGraphWithParallelismFuture.handle(
                        (executionGraphWithVertexParallelism, throwable) -> {
                            context.runIfState(
                                    this,
                                    () ->
                                            handleExecutionGraphCreation(
                                                    executionGraphWithVertexParallelism, throwable),
                                    Duration.ZERO);
                            return null;
                        }));
        this.previousExecutionGraph = previousExecutionGraph;
    }

    private void handleExecutionGraphCreation(
            @Nullable ExecutionGraphWithVertexParallelism executionGraphWithVertexParallelism,
            @Nullable Throwable throwable) {
        if (throwable != null) {
            getLogger()
                    .info(
                            "Failed to go from {} to {} because the ExecutionGraph creation failed.",
                            CreatingExecutionGraph.class.getSimpleName(),
                            Executing.class.getSimpleName(),
                            throwable);
            context.goToFinished(context.getArchivedExecutionGraph(JobStatus.FAILED, throwable));
        } else {
            for (ExecutionVertex vertex :
                    executionGraphWithVertexParallelism.executionGraph.getAllExecutionVertices()) {
                vertex.getCurrentExecutionAttempt().transitionState(ExecutionState.SCHEDULED);
            }

            final AssignmentResult result =
                    context.tryToAssignSlots(executionGraphWithVertexParallelism);

            if (result.isSuccess()) {
                getLogger()
                        .debug(
                                "Successfully reserved and assigned the required slots for the ExecutionGraph.");
                final ExecutionGraph executionGraph = result.getExecutionGraph();
                final ExecutionGraphHandler executionGraphHandler =
                        new ExecutionGraphHandler(
                                executionGraph,
                                getLogger(),
                                context.getIOExecutor(),
                                context.getMainThreadExecutor());
                // Operator coordinator outlives the current state, so we need to use context as a
                // global failure handler.
                final OperatorCoordinatorHandler operatorCoordinatorHandler =
                        operatorCoordinatorHandlerFactory.create(executionGraph, context);
                operatorCoordinatorHandler.initializeOperatorCoordinators(
                        context.getMainThreadExecutor());
                final String updatedPlan =
                        JsonPlanGenerator.generatePlan(
                                executionGraph.getJobID(),
                                executionGraph.getJobName(),
                                JobType.STREAMING, // Adaptive scheduler works only with STREAMING
                                // jobs
                                () ->
                                        IterableUtils.toStream(
                                                        executionGraph.getVerticesTopologically())
                                                .map(ExecutionJobVertex::getJobVertex)
                                                .iterator(),
                                executionGraphWithVertexParallelism.getVertexParallelism());
                executionGraph.setJsonPlan(updatedPlan);

                executionGraph.transitionToRunning();
                operatorCoordinatorHandler.startAllOperatorCoordinators();

                context.goToExecuting(
                        result.getExecutionGraph(),
                        executionGraphHandler,
                        operatorCoordinatorHandler,
                        Collections.emptyList());
            } else {
                getLogger()
                        .debug(
                                "Failed to reserve and assign the required slots. Waiting for new resources.");
                context.goToWaitingForResources(previousExecutionGraph);
            }
        }
    }

    @Override
    public JobStatus getJobStatus() {
        return JobStatus.CREATED;
    }

    /** Context for the {@link CreatingExecutionGraph} state. */
    interface Context
            extends StateWithoutExecutionGraph.Context,
                    GlobalFailureHandler,
                    StateTransitions.ToExecuting,
                    StateTransitions.ToWaitingForResources {

        /**
         * Runs the given action after a delay if the state at this time equals the expected state.
         *
         * @param expectedState expectedState describes the required state at the time of running
         *     the action
         * @param action action to run if the expected state equals the actual state
         * @param delay delay after which to run the action
         * @return a ScheduledFuture representing pending completion of the task
         */
        ScheduledFuture runIfState(State expectedState, Runnable action, Duration delay);

        /**
         * Try to assign slots to the created {@link ExecutionGraph}. If it is possible, then this
         * method returns a successful {@link AssignmentResult} which contains the assigned {@link
         * ExecutionGraph}. If not, then the assignment result is a failure.
         *
         * @param executionGraphWithVertexParallelism executionGraphWithVertexParallelism to assign
         *     slots to resources
         * @return {@link AssignmentResult} representing the result of the assignment
         */
        AssignmentResult tryToAssignSlots(
                ExecutionGraphWithVertexParallelism executionGraphWithVertexParallelism);

        /**
         * Gets the I/O executor.
         *
         * @return the I/O executor
         */
        Executor getIOExecutor();

        /**
         * Gets the main thread executor.
         *
         * @return the main thread executor
         */
        ComponentMainThreadExecutor getMainThreadExecutor();

        /**
         * Gets the {@link JobManagerJobMetricGroup}.
         *
         * @return the metric group
         */
        JobManagerJobMetricGroup getMetricGroup();
    }

    @FunctionalInterface
    interface OperatorCoordinatorHandlerFactory {

        /**
         * Creates a new {@link OperatorCoordinatorHandler}. This interface is primarily intended
         * for easier testing.
         *
         * @param executionGraph Current execution graph, that contains operator coordinators that
         *     we want to start.
         * @param globalFailureHandler Global failure handler.
         * @return An {@link OperatorCoordinatorHandler} instance.
         */
        OperatorCoordinatorHandler create(
                ExecutionGraph executionGraph, GlobalFailureHandler globalFailureHandler);
    }

    /**
     * Class representing the assignment result of the slots to the {@link ExecutionGraph}. The
     * assignment is either successful or not possible. If it is successful, the assignment also
     * contains the assigned {@link ExecutionGraph}.
     */
    static final class AssignmentResult {

        private static final AssignmentResult NOT_POSSIBLE = new AssignmentResult(null);

        @Nullable private final ExecutionGraph executionGraph;

        private AssignmentResult(@Nullable ExecutionGraph executionGraph) {
            this.executionGraph = executionGraph;
        }

        boolean isSuccess() {
            return executionGraph != null;
        }

        ExecutionGraph getExecutionGraph() {
            Preconditions.checkState(
                    isSuccess(), "Can only return the ExecutionGraph if it is a success.");
            return executionGraph;
        }

        static AssignmentResult success(ExecutionGraph executionGraph) {
            return new AssignmentResult(
                    Preconditions.checkNotNull(
                            executionGraph,
                            "AssignmentResult.success expects a non-null ExecutionGraph."));
        }

        static AssignmentResult notPossible() {
            return NOT_POSSIBLE;
        }
    }

    /** Factory for the {@link CreatingExecutionGraph} state. */
    static class Factory implements StateFactory {

        private final Context context;

        private final CompletableFuture
                executionGraphWithParallelismFuture;

        private final @Nullable ExecutionGraph previousExecutionGraph;

        private final Logger log;

        Factory(
                Context context,
                CompletableFuture
                        executionGraphWithParallelismFuture,
                Logger log,
                @Nullable ExecutionGraph previousExecutionGraph) {
            this.context = context;
            this.executionGraphWithParallelismFuture = executionGraphWithParallelismFuture;
            this.log = log;
            this.previousExecutionGraph = previousExecutionGraph;
        }

        @Override
        public Class getStateClass() {
            return CreatingExecutionGraph.class;
        }

        @Override
        public CreatingExecutionGraph getState() {
            return new CreatingExecutionGraph(
                    context,
                    executionGraphWithParallelismFuture,
                    log,
                    DefaultOperatorCoordinatorHandler::new,
                    previousExecutionGraph);
        }
    }

    static class ExecutionGraphWithVertexParallelism {
        private final ExecutionGraph executionGraph;

        private final JobSchedulingPlan jobSchedulingPlan;

        private ExecutionGraphWithVertexParallelism(
                ExecutionGraph executionGraph, JobSchedulingPlan jobSchedulingPlan) {
            this.executionGraph = executionGraph;
            this.jobSchedulingPlan = jobSchedulingPlan;
        }

        public static ExecutionGraphWithVertexParallelism create(
                ExecutionGraph executionGraph, JobSchedulingPlan vertexParallelism) {
            return new ExecutionGraphWithVertexParallelism(executionGraph, vertexParallelism);
        }

        public ExecutionGraph getExecutionGraph() {
            return executionGraph;
        }

        public VertexParallelism getVertexParallelism() {
            return jobSchedulingPlan.getVertexParallelism();
        }

        public JobSchedulingPlan getJobSchedulingPlan() {
            return jobSchedulingPlan;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy