org.apache.flink.runtime.jobmaster.JobMasterGateway Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmaster;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinatorGateway;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.ExecutionJobVertex;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.io.network.partition.ResultPartition;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.jobmaster.message.ClassloadingProps;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.messages.webmonitor.JobDetails;
import org.apache.flink.runtime.preaggregatedaccumulators.CommitAccumulator;
import org.apache.flink.runtime.registration.RegistrationResponse;
import org.apache.flink.runtime.resourcemanager.ResourceManagerId;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.OperatorBackPressureStatsResponse;
import org.apache.flink.runtime.rest.messages.job.JobPendingSlotRequestDetail;
import org.apache.flink.runtime.rpc.FencedRpcGateway;
import org.apache.flink.runtime.rpc.RpcTimeout;
import org.apache.flink.runtime.taskexecutor.AccumulatorReport;
import org.apache.flink.runtime.taskexecutor.TaskExecutionStatus;
import org.apache.flink.runtime.taskexecutor.TaskExecutorReportResponse;
import org.apache.flink.runtime.taskexecutor.slot.SlotOffer;
import org.apache.flink.runtime.taskmanager.TaskExecutionState;
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
import org.apache.flink.runtime.update.JobUpdateRequest;
import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.CompletableFuture;
/**
* {@link JobMaster} rpc gateway interface.
*/
public interface JobMasterGateway extends
CheckpointCoordinatorGateway,
FencedRpcGateway,
KvStateLocationOracle,
KvStateRegistryGateway {
/**
* Cancels the currently executed job.
*
* @param timeout of this operation
* @return Future acknowledge of the operation
*/
CompletableFuture cancel(@RpcTimeout Time timeout);
/**
* Cancel the currently executed job.
*
* @param timeout of this operation
* @return Future acknowledge if the cancellation was successful
*/
CompletableFuture stop(@RpcTimeout Time timeout);
/**
* Request the current JobGraph.
*
* @return A Future to current {@link JobGraph}
*/
CompletableFuture requestJobGraph(Time timeout);
/**
* Trigger update of the executed job.
*
* @param request indicating how to update the job
* @param timeout of this operation
* @return Future which is completed with {@link Acknowledge} once the update was successful
*/
CompletableFuture updateJob(JobUpdateRequest request, Time timeout);
/**
* Triggers rescaling of the executed job.
*
* @param newParallelism new parallelism of the job
* @param rescalingBehaviour defining how strict the rescaling has to be executed
* @param timeout of this operation
* @return Future which is completed with {@link Acknowledge} once the rescaling was successful
*/
CompletableFuture rescaleJob(
int newParallelism,
RescalingBehaviour rescalingBehaviour,
@RpcTimeout Time timeout);
/**
* Triggers rescaling of the given set of operators.
*
* @param operators set of operators which shall be rescaled
* @param newParallelism new parallelism of the given set of operators
* @param rescalingBehaviour defining how strict the rescaling has to be executed
* @param timeout of this operation
* @return Future which is completed with {@link Acknowledge} once the rescaling was successful
*/
CompletableFuture rescaleOperators(
Collection operators,
int newParallelism,
RescalingBehaviour rescalingBehaviour,
@RpcTimeout Time timeout);
/**
* Updates the task execution state for a given task.
*
* @param taskExecutionState New task execution state for a given task
* @return Future flag of the task execution state update result
*/
CompletableFuture updateTaskExecutionState(
final TaskExecutionState taskExecutionState);
/**
* Requests the next input split for a source operator of the {@link ExecutionJobVertex}.
* The next input split is sent back to the sender as a
* {@link SerializedInputSplit} message.
*
* @param vertexID The job vertex id
* @param operatorID The operator id
* @param executionAttempt The execution attempt id
* @return The future of the input split. If there is no further input split, will return an empty object.
*/
CompletableFuture requestNextInputSplit(
final JobVertexID vertexID,
final OperatorID operatorID,
final ExecutionAttemptID executionAttempt);
/**
* Requests the current state of the partition. The state of a
* partition is currently bound to the state of the producing execution.
*
* @param intermediateResultId The execution attempt ID of the task requesting the partition state.
* @param partitionId The partition ID of the partition to request the state of.
* @return The future of the partition state
*/
CompletableFuture requestPartitionState(
final IntermediateDataSetID intermediateResultId,
final ResultPartitionID partitionId);
/**
* Notifies the JobManager about available data for a produced partition.
*
* There is a call to this method for each {@link ExecutionVertex} instance once per produced
* {@link ResultPartition} instance, either when first producing data (for pipelined executions)
* or when all data has been produced (for staged executions).
*
*
The JobManager then can decide when to schedule the partition consumers of the given session.
*
* @param partitionID The partition which has already produced data
* @param timeout before the rpc call fails
* @return Future acknowledge of the schedule or update operation
*/
CompletableFuture scheduleOrUpdateConsumers(
final ResultPartitionID partitionID,
@RpcTimeout final Time timeout);
/**
* Disconnects the given {@link org.apache.flink.runtime.taskexecutor.TaskExecutor} from the
* {@link JobMaster}.
*
* @param resourceID identifying the TaskManager to disconnect
* @param cause for the disconnection of the TaskManager
* @return Future acknowledge once the JobMaster has been disconnected from the TaskManager
*/
CompletableFuture disconnectTaskManager(ResourceID resourceID, Exception cause);
/**
* Disconnects the resource manager from the job manager because of the given cause.
*
* @param resourceManagerId identifying the resource manager leader id
* @param cause of the disconnect
*/
void disconnectResourceManager(
final ResourceManagerId resourceManagerId,
final Exception cause);
/**
* Request the classloading props of this job.
*/
CompletableFuture requestClassloadingProps();
/**
* Offers the given slots to the job manager. The response contains the set of accepted slots.
*
* @param taskManagerId identifying the task manager
* @param slots to offer to the job manager
* @param timeout for the rpc call
* @return Future set of accepted slots.
*/
CompletableFuture> offerSlots(
final ResourceID taskManagerId,
final Collection slots,
@RpcTimeout final Time timeout);
/**
* Report existing tasks execution status after jm failover.
*
* @param taskManagerId the task manager id
* @param tasksExecutionStatus the tasks execution status
* @param timeout the timeout
* @return Future indicating whether status reporting is successful
*/
CompletableFuture reportTasksExecutionStatus(
final ResourceID taskManagerId,
final List tasksExecutionStatus,
@RpcTimeout final Time timeout);
/**
* Fails the slot with the given allocation id and cause.
*
* @param taskManagerId identifying the task manager
* @param allocationId identifying the slot to fail
* @param cause of the failing
*/
void failSlot(final ResourceID taskManagerId,
final AllocationID allocationId,
final Exception cause);
/**
* Registers the task manager at the job manager.
*
* @param taskManagerRpcAddress the rpc address of the task manager
* @param taskManagerLocation location of the task manager
* @param timeout for the rpc call
* @return Future registration response indicating whether the registration was successful or not
*/
CompletableFuture registerTaskManager(
final String taskManagerRpcAddress,
final TaskManagerLocation taskManagerLocation,
@RpcTimeout final Time timeout);
/**
* Sends the heartbeat to job manager from task manager.
*
* @param resourceID unique id of the task manager
* @param accumulatorReport report containing accumulator updates
*/
void heartbeatFromTaskManager(
final ResourceID resourceID,
final AccumulatorReport accumulatorReport);
/**
* Sends heartbeat request from the resource manager.
*
* @param resourceID unique id of the resource manager
*/
void heartbeatFromResourceManager(final ResourceID resourceID);
/**
* Request the details of the executed job.
*
* @param timeout for the rpc call
* @return Future details of the executed job
*/
CompletableFuture requestJobDetails(@RpcTimeout Time timeout);
/**
* Requests the current job status.
*
* @param timeout for the rpc call
* @return Future containing the current job status
*/
CompletableFuture requestJobStatus(@RpcTimeout Time timeout);
/**
* Requests the {@link ArchivedExecutionGraph} of the executed job.
*
* @param timeout for the rpc call
* @return Future which is completed with the {@link ArchivedExecutionGraph} of the executed job
*/
CompletableFuture requestJob(@RpcTimeout Time timeout);
/**
* Request the details of pending slot requests of the current job.
*
* @param timeout for the rpc call
* @return the list of pending slot requests.
*/
CompletableFuture> requestPendingSlotRequestDetails(@RpcTimeout Time timeout);
/**
* Triggers taking a savepoint of the executed job.
*
* @param targetDirectory to which to write the savepoint data or null if the
* default savepoint directory should be used
* @param timeout for the rpc call
* @return Future which is completed with the savepoint path once completed
*/
CompletableFuture triggerSavepoint(
@Nullable final String targetDirectory,
final boolean cancelJob,
@RpcTimeout final Time timeout);
/**
* Requests the statistics on operator back pressure.
*
* @param jobVertexId JobVertex for which the stats are requested.
* @return A Future to the {@link OperatorBackPressureStatsResponse} or {@code null} if the stats are
* not available (yet).
*/
CompletableFuture requestOperatorBackPressureStats(JobVertexID jobVertexId);
/**
* Notifies that the allocation has failed.
*
* @param allocationID the failed allocation id.
* @param cause the reason that the allocation failed
*/
void notifyAllocationFailure(AllocationID allocationID, Exception cause);
/**
* Commits a list of aggregated accumulator values.
*
* @param commitAccumulators accumulators to commit.
*/
void commitPreAggregatedAccumulator(List commitAccumulators);
/**
* Queries an aggregated accumulator with the specific name.
*
* @param name the name of the target accumulator.
* @return Future which is completed with the result of querying accumulator.
*/
CompletableFuture> queryPreAggregatedAccumulator(String name);
}