All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.TaskLocalStateStoreImpl Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.JobID;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.checkpoint.TaskStateSnapshot;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.jobgraph.JobVertexID;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;

import java.io.File;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
import java.util.function.LongPredicate;

/**
 * Main implementation of a {@link TaskLocalStateStore}.
 */
public class TaskLocalStateStoreImpl implements OwnedTaskLocalStateStore {

	/** Logger for this class. */
	private static final Logger LOG = LoggerFactory.getLogger(TaskLocalStateStoreImpl.class);

	/** Dummy value to use instead of null to satisfy {@link ConcurrentHashMap}. */
	@VisibleForTesting
	static final TaskStateSnapshot NULL_DUMMY = new TaskStateSnapshot(0);

	/** JobID from the owning subtask. */
	@Nonnull
	private final JobID jobID;

	/** AllocationID of the owning slot. */
	@Nonnull
	private final AllocationID allocationID;

	/** JobVertexID of the owning subtask. */
	@Nonnull
	private final JobVertexID jobVertexID;

	/** Subtask index of the owning subtask. */
	@Nonnegative
	private final int subtaskIndex;

	/** The configured mode for local recovery. */
	@Nonnull
	private final LocalRecoveryConfig localRecoveryConfig;

	/** Executor that runs the discarding of released state objects. */
	@Nonnull
	private final Executor discardExecutor;

	/** Lock for synchronisation on the storage map and the discarded status. */
	@Nonnull
	private final Object lock;

	/** Status flag if this store was already discarded. */
	@GuardedBy("lock")
	private boolean disposed;

	/** Maps checkpoint ids to local TaskStateSnapshots. */
	@Nonnull
	@GuardedBy("lock")
	private final SortedMap storedTaskStateByCheckpointID;

	public TaskLocalStateStoreImpl(
		@Nonnull JobID jobID,
		@Nonnull AllocationID allocationID,
		@Nonnull JobVertexID jobVertexID,
		@Nonnegative int subtaskIndex,
		@Nonnull LocalRecoveryConfig localRecoveryConfig,
		@Nonnull Executor discardExecutor) {

		this(
			jobID,
			allocationID,
			jobVertexID,
			subtaskIndex,
			localRecoveryConfig,
			discardExecutor,
			new TreeMap<>(),
			new Object());
	}

	@VisibleForTesting
	TaskLocalStateStoreImpl(
		@Nonnull JobID jobID,
		@Nonnull AllocationID allocationID,
		@Nonnull JobVertexID jobVertexID,
		@Nonnegative int subtaskIndex,
		@Nonnull LocalRecoveryConfig localRecoveryConfig,
		@Nonnull Executor discardExecutor,
		@Nonnull SortedMap storedTaskStateByCheckpointID,
		@Nonnull Object lock) {

		this.jobID = jobID;
		this.allocationID = allocationID;
		this.jobVertexID = jobVertexID;
		this.subtaskIndex = subtaskIndex;
		this.discardExecutor = discardExecutor;
		this.localRecoveryConfig = localRecoveryConfig;
		this.storedTaskStateByCheckpointID = storedTaskStateByCheckpointID;
		this.lock = lock;
		this.disposed = false;
	}

	@Override
	public void storeLocalState(
		@Nonnegative long checkpointId,
		@Nullable TaskStateSnapshot localState) {

		if (localState == null) {
			localState = NULL_DUMMY;
		}

		if (LOG.isTraceEnabled()) {
			LOG.trace(
				"Stored local state for checkpoint {} in subtask ({} - {} - {}) : {}.",
				checkpointId, jobID, jobVertexID, subtaskIndex, localState);
		} else if (LOG.isDebugEnabled()) {
			LOG.debug(
				"Stored local state for checkpoint {} in subtask ({} - {} - {})",
				checkpointId, jobID, jobVertexID, subtaskIndex);
		}

		Map.Entry toDiscard = null;

		synchronized (lock) {
			if (disposed) {
				// we ignore late stores and simply discard the state.
				toDiscard = new AbstractMap.SimpleEntry<>(checkpointId, localState);
			} else {
				TaskStateSnapshot previous =
					storedTaskStateByCheckpointID.put(checkpointId, localState);

				if (previous != null) {
					toDiscard = new AbstractMap.SimpleEntry<>(checkpointId, previous);
				}
			}
		}

		if (toDiscard != null) {
			asyncDiscardLocalStateForCollection(Collections.singletonList(toDiscard));
		}
	}

	@Override
	@Nullable
	public TaskStateSnapshot retrieveLocalState(long checkpointID) {

		TaskStateSnapshot snapshot;

		synchronized (lock) {
			snapshot = storedTaskStateByCheckpointID.get(checkpointID);
		}

		if (snapshot != null) {
			if (LOG.isTraceEnabled()) {
				LOG.trace("Found registered local state for checkpoint {} in subtask ({} - {} - {}) : {}",
					checkpointID, jobID, jobVertexID, subtaskIndex, snapshot);
			} else if (LOG.isDebugEnabled()) {
				LOG.debug("Found registered local state for checkpoint {} in subtask ({} - {} - {})",
					checkpointID, jobID, jobVertexID, subtaskIndex);
			}
		} else {
			LOG.debug("Did not find registered local state for checkpoint {} in subtask ({} - {} - {})",
				checkpointID, jobID, jobVertexID, subtaskIndex);
		}

		return (snapshot != NULL_DUMMY) ? snapshot : null;
	}

	@Override
	@Nonnull
	public LocalRecoveryConfig getLocalRecoveryConfig() {
		return localRecoveryConfig;
	}

	@Override
	public void confirmCheckpoint(long confirmedCheckpointId) {

		LOG.debug("Received confirmation for checkpoint {} in subtask ({} - {} - {}). Starting to prune history.",
			confirmedCheckpointId, jobID, jobVertexID, subtaskIndex);

		pruneCheckpoints(
			(snapshotCheckpointId) -> snapshotCheckpointId < confirmedCheckpointId,
			true);

	}

	@Override
	public void pruneMatchingCheckpoints(@Nonnull LongPredicate matcher) {

		pruneCheckpoints(
			matcher,
			false);
	}

	/**
	 * Disposes the state of all local snapshots managed by this object.
	 */
	@Override
	public CompletableFuture dispose() {

		Collection> statesCopy;

		synchronized (lock) {
			disposed = true;
			statesCopy = new ArrayList<>(storedTaskStateByCheckpointID.entrySet());
			storedTaskStateByCheckpointID.clear();
		}

		return CompletableFuture.runAsync(
			() -> {
				// discard all remaining state objects.
				syncDiscardLocalStateForCollection(statesCopy);

				// delete the local state subdirectory that belong to this subtask.
				LocalRecoveryDirectoryProvider directoryProvider = localRecoveryConfig.getLocalStateDirectoryProvider();
				for (int i = 0; i < directoryProvider.allocationBaseDirsCount(); ++i) {
					File subtaskBaseDirectory = directoryProvider.selectSubtaskBaseDirectory(i);
					try {
						deleteDirectory(subtaskBaseDirectory);
					} catch (IOException e) {
						LOG.warn("Exception when deleting local recovery subtask base directory {} in subtask ({} - {} - {})",
							subtaskBaseDirectory, jobID, jobVertexID, subtaskIndex, e);
					}
				}
			},
			discardExecutor);
	}

	private void asyncDiscardLocalStateForCollection(Collection> toDiscard) {
		if (!toDiscard.isEmpty()) {
			discardExecutor.execute(() -> syncDiscardLocalStateForCollection(toDiscard));
		}
	}

	private void syncDiscardLocalStateForCollection(Collection> toDiscard) {
		for (Map.Entry entry : toDiscard) {
			discardLocalStateForCheckpoint(entry.getKey(), entry.getValue());
		}
	}

	/**
	 * Helper method that discards state objects with an executor and reports exceptions to the log.
	 */
	private void discardLocalStateForCheckpoint(long checkpointID, TaskStateSnapshot o) {

		if (LOG.isTraceEnabled()) {
			LOG.trace("Discarding local task state snapshot of checkpoint {} for subtask ({} - {} - {}).",
				checkpointID, jobID, jobVertexID, subtaskIndex);
		} else {
			LOG.debug("Discarding local task state snapshot {} of checkpoint {} for subtask ({} - {} - {}).",
				o, checkpointID, jobID, jobVertexID, subtaskIndex);
		}

		try {
			o.discardState();
		} catch (Exception discardEx) {
			LOG.warn("Exception while discarding local task state snapshot of checkpoint {} in subtask ({} - {} - {}).",
				checkpointID, jobID, jobVertexID, subtaskIndex, discardEx);
		}

		LocalRecoveryDirectoryProvider directoryProvider = localRecoveryConfig.getLocalStateDirectoryProvider();
		File checkpointDir = directoryProvider.subtaskSpecificCheckpointDirectory(checkpointID);

		LOG.debug("Deleting local state directory {} of checkpoint {} for subtask ({} - {} - {}).",
			checkpointDir, checkpointID, jobID, jobVertexID, subtaskIndex);

		try {
			deleteDirectory(checkpointDir);
		} catch (IOException ex) {
			LOG.warn("Exception while deleting local state directory of checkpoint {} in subtask ({} - {} - {}).",
				checkpointID, jobID, jobVertexID, subtaskIndex, ex);
		}
	}

	/**
	 * Helper method to delete a directory.
	 */
	private void deleteDirectory(File directory) throws IOException {
		Path path = new Path(directory.toURI());
		FileSystem fileSystem = path.getFileSystem();
		if (fileSystem.exists(path)) {
			fileSystem.delete(path, true);
		}
	}

	/**
	 * Pruning the useless checkpoints, it should be called only when holding the {@link #lock}.
	 */
	private void pruneCheckpoints(LongPredicate pruningChecker, boolean breakOnceCheckerFalse) {

		final List> toRemove = new ArrayList<>();

		synchronized (lock) {

			Iterator> entryIterator =
				storedTaskStateByCheckpointID.entrySet().iterator();

			while (entryIterator.hasNext()) {

				Map.Entry snapshotEntry = entryIterator.next();
				long entryCheckpointId = snapshotEntry.getKey();

				if (pruningChecker.test(entryCheckpointId)) {
					toRemove.add(snapshotEntry);
					entryIterator.remove();
				} else if (breakOnceCheckerFalse) {
					break;
				}
			}
		}

		asyncDiscardLocalStateForCollection(toRemove);
	}

	@Override
	public String toString() {
		return "TaskLocalStateStore{" +
			"jobID=" + jobID +
			", jobVertexID=" + jobVertexID +
			", allocationID=" + allocationID.toHexString() +
			", subtaskIndex=" + subtaskIndex +
			", localRecoveryConfig=" + localRecoveryConfig +
			", storedCheckpointIDs=" + storedTaskStateByCheckpointID.keySet() +
			'}';
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy