All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.TaskStateManagerImpl Maven / Gradle / Ivy

There is a newer version: 1.19.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state;

import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.checkpoint.CheckpointMetaData;
import org.apache.flink.runtime.checkpoint.CheckpointMetrics;
import org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor;
import org.apache.flink.runtime.checkpoint.JobManagerTaskRestore;
import org.apache.flink.runtime.checkpoint.OperatorSubtaskState;
import org.apache.flink.runtime.checkpoint.PrioritizedOperatorSubtaskState;
import org.apache.flink.runtime.checkpoint.TaskStateSnapshot;
import org.apache.flink.runtime.checkpoint.channel.SequentialChannelStateReader;
import org.apache.flink.runtime.checkpoint.channel.SequentialChannelStateReaderImpl;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.state.changelog.ChangelogStateHandle;
import org.apache.flink.runtime.state.changelog.StateChangelogStorage;
import org.apache.flink.runtime.state.changelog.StateChangelogStorageView;
import org.apache.flink.runtime.taskmanager.CheckpointResponder;
import org.apache.flink.util.ExceptionUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

/**
 * This class is the default implementation of {@link TaskStateManager} and collaborates with the
 * job manager through {@link CheckpointResponder}) as well as a task-manager-local state store.
 * Like this, client code does not have to deal with the differences between remote or local state
 * on recovery because this class handles both cases transparently.
 *
 * 

Reported state is tagged by clients so that this class can properly forward to the right * receiver for the checkpointed state. */ public class TaskStateManagerImpl implements TaskStateManager { /** The logger for this class. */ private static final Logger LOG = LoggerFactory.getLogger(TaskStateManagerImpl.class); /** The id of the job for which this manager was created, can report, and recover. */ private final JobID jobId; /** The execution attempt id that this manager reports for. */ private final ExecutionAttemptID executionAttemptID; /** * The data given by the job manager to restore the job. This is null for a new job without * previous state. */ @Nullable private final JobManagerTaskRestore jobManagerTaskRestore; /** The local state store to which this manager reports local state snapshots. */ private final TaskLocalStateStore localStateStore; /** The changelog storage where the manager reads and writes the changelog */ @Nullable private final StateChangelogStorage stateChangelogStorage; private final TaskExecutorStateChangelogStoragesManager changelogStoragesManager; /** The checkpoint responder through which this manager can report to the job manager. */ private final CheckpointResponder checkpointResponder; private final SequentialChannelStateReader sequentialChannelStateReader; public TaskStateManagerImpl( @Nonnull JobID jobId, @Nonnull ExecutionAttemptID executionAttemptID, @Nonnull TaskLocalStateStore localStateStore, @Nullable StateChangelogStorage stateChangelogStorage, @Nonnull TaskExecutorStateChangelogStoragesManager changelogStoragesManager, @Nullable JobManagerTaskRestore jobManagerTaskRestore, @Nonnull CheckpointResponder checkpointResponder) { this( jobId, executionAttemptID, localStateStore, stateChangelogStorage, changelogStoragesManager, jobManagerTaskRestore, checkpointResponder, new SequentialChannelStateReaderImpl( jobManagerTaskRestore == null ? new TaskStateSnapshot() : jobManagerTaskRestore.getTaskStateSnapshot())); } public TaskStateManagerImpl( @Nonnull JobID jobId, @Nonnull ExecutionAttemptID executionAttemptID, @Nonnull TaskLocalStateStore localStateStore, @Nullable StateChangelogStorage stateChangelogStorage, @Nonnull TaskExecutorStateChangelogStoragesManager changelogStoragesManager, @Nullable JobManagerTaskRestore jobManagerTaskRestore, @Nonnull CheckpointResponder checkpointResponder, @Nonnull SequentialChannelStateReaderImpl sequentialChannelStateReader) { this.jobId = jobId; this.localStateStore = localStateStore; this.stateChangelogStorage = stateChangelogStorage; this.changelogStoragesManager = changelogStoragesManager; this.jobManagerTaskRestore = jobManagerTaskRestore; this.executionAttemptID = executionAttemptID; this.checkpointResponder = checkpointResponder; this.sequentialChannelStateReader = sequentialChannelStateReader; } @Override public void reportTaskStateSnapshots( @Nonnull CheckpointMetaData checkpointMetaData, @Nonnull CheckpointMetrics checkpointMetrics, @Nullable TaskStateSnapshot acknowledgedState, @Nullable TaskStateSnapshot localState) { long checkpointId = checkpointMetaData.getCheckpointId(); localStateStore.storeLocalState(checkpointId, localState); checkpointResponder.acknowledgeCheckpoint( jobId, executionAttemptID, checkpointId, checkpointMetrics, acknowledgedState); } @Override public void reportIncompleteTaskStateSnapshots( CheckpointMetaData checkpointMetaData, CheckpointMetrics checkpointMetrics) { checkpointResponder.reportCheckpointMetrics( jobId, executionAttemptID, checkpointMetaData.getCheckpointId(), checkpointMetrics); } @Override public InflightDataRescalingDescriptor getInputRescalingDescriptor() { if (jobManagerTaskRestore == null) { return InflightDataRescalingDescriptor.NO_RESCALE; } return jobManagerTaskRestore.getTaskStateSnapshot().getInputRescalingDescriptor(); } @Override public InflightDataRescalingDescriptor getOutputRescalingDescriptor() { if (jobManagerTaskRestore == null) { return InflightDataRescalingDescriptor.NO_RESCALE; } return jobManagerTaskRestore.getTaskStateSnapshot().getOutputRescalingDescriptor(); } public boolean isTaskDeployedAsFinished() { if (jobManagerTaskRestore == null) { return false; } return jobManagerTaskRestore.getTaskStateSnapshot().isTaskDeployedAsFinished(); } @Override public Optional getRestoreCheckpointId() { if (jobManagerTaskRestore == null) { // This happens only if no checkpoint to restore. return Optional.empty(); } return Optional.of(jobManagerTaskRestore.getRestoreCheckpointId()); } @Override public PrioritizedOperatorSubtaskState prioritizedOperatorState(OperatorID operatorID) { if (jobManagerTaskRestore == null) { return PrioritizedOperatorSubtaskState.emptyNotRestored(); } TaskStateSnapshot jobManagerStateSnapshot = jobManagerTaskRestore.getTaskStateSnapshot(); OperatorSubtaskState jobManagerSubtaskState = jobManagerStateSnapshot.getSubtaskStateByOperatorID(operatorID); if (jobManagerSubtaskState == null) { return PrioritizedOperatorSubtaskState.empty( jobManagerTaskRestore.getRestoreCheckpointId()); } long restoreCheckpointId = jobManagerTaskRestore.getRestoreCheckpointId(); TaskStateSnapshot localStateSnapshot = localStateStore.retrieveLocalState(restoreCheckpointId); localStateStore.pruneMatchingCheckpoints( (long checkpointId) -> checkpointId != restoreCheckpointId); List alternativesByPriority = Collections.emptyList(); if (localStateSnapshot != null) { OperatorSubtaskState localSubtaskState = localStateSnapshot.getSubtaskStateByOperatorID(operatorID); if (localSubtaskState != null) { alternativesByPriority = Collections.singletonList(localSubtaskState); } } LOG.debug( "Operator {} has remote state {} from job manager and local state alternatives {} from local " + "state store {}.", operatorID, jobManagerSubtaskState, alternativesByPriority, localStateStore); PrioritizedOperatorSubtaskState.Builder builder = new PrioritizedOperatorSubtaskState.Builder( jobManagerSubtaskState, alternativesByPriority, jobManagerTaskRestore.getRestoreCheckpointId()); return builder.build(); } @Nonnull @Override public LocalRecoveryConfig createLocalRecoveryConfig() { return localStateStore.getLocalRecoveryConfig(); } @Override public SequentialChannelStateReader getSequentialChannelStateReader() { return sequentialChannelStateReader; } @Nullable @Override public StateChangelogStorage getStateChangelogStorage() { return stateChangelogStorage; } @Nullable @Override public StateChangelogStorageView getStateChangelogStorageView( Configuration configuration, ChangelogStateHandle changelogStateHandle) { StateChangelogStorageView storageView = null; try { storageView = changelogStoragesManager.stateChangelogStorageViewForJob( jobId, configuration, changelogStateHandle); } catch (IOException e) { ExceptionUtils.rethrow(e); } return storageView; } /** Tracking when local state can be confirmed and disposed. */ @Override public void notifyCheckpointComplete(long checkpointId) throws Exception { localStateStore.confirmCheckpoint(checkpointId); } /** Tracking when some local state can be disposed. */ @Override public void notifyCheckpointAborted(long checkpointId) { localStateStore.abortCheckpoint(checkpointId); } @Override public void close() throws Exception { sequentialChannelStateReader.close(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy