All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManagerImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.snapshot;

import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.IntSerializer;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.api.common.typeutils.base.MapSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.runtime.TupleSerializer;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.core.fs.FileStatus;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.fs.FileCleaner;
import org.apache.flink.runtime.state.gemini.engine.fs.FileIDImpl;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.memstore.WriteBufferManager;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.ThreadFactoryBuilder;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

/**
 * Implementation of {@link SnapshotManager}.
 */
public class SnapshotManagerImpl implements SnapshotManager {

	private static final Logger LOG = LoggerFactory.getLogger(SnapshotManagerImpl.class);

	public static final String SNAPSHOT_DIR = "snapshot";

	public static final String SNAPSHOT_FILE_PREFIX = "snapshot";

	public static final String SNAPSHOT_FILE_SEPERATOR = "-";

	private final boolean localSnapshotEnabled;

	private final FileManager localFileManager;

	private final FileManager dfsFileManager;

	private boolean needToBreakLineage;

	private final WriteBufferManager writeBufferManager;

	private final SortedMap completedSnapshots;

	private final SortedMap runningSnapshots;

	private final SortedSet runningSnapshotAccessNumber;

	private volatile long minRunningSnapshotAccessNumber;

	private final ExecutorService snapshotExecutor;

	private final GContext gContext;

	private final FileCleaner fileCleaner;

	/** Snapshot manager wide lock to safeguard the snapshot updates */
	private final Object lock = new Object();

	public SnapshotManagerImpl(
		GContext gContext,
		WriteBufferManager writeBufferManager,
		FileManager localFileManager,
		FileManager dfsFileManager) {
		this.gContext = gContext;
		this.writeBufferManager = writeBufferManager;
		this.localFileManager = localFileManager;
		this.dfsFileManager = dfsFileManager;
		this.completedSnapshots = new TreeMap<>();
		this.runningSnapshots = new TreeMap<>();
		this.runningSnapshotAccessNumber = new TreeSet<>();
		this.minRunningSnapshotAccessNumber = Long.MAX_VALUE;
		this.localSnapshotEnabled = gContext.getGConfiguration().isLocalSnapshotEnabled();
		String prefix = gContext.getGConfiguration().getExcetorPrefixName();
		ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "geminiMainSnapshot-%d").build();
		this.snapshotExecutor = new ThreadPoolExecutor(1,
			1,
			0L,
			TimeUnit.MILLISECONDS,
			new LinkedBlockingQueue<>(Short.MAX_VALUE),
			namedThreadFactory);
		this.fileCleaner = gContext.getSupervisor().getFileCleaner();
		LOG.info("SnapshotManager is created.");
	}

	@Override
	public boolean isNeedToBreakLineage() {
		return needToBreakLineage;
	}

	/**
	 * Note this method should be called before restoreLineage.
	 */
	@Override
	public void setNeedToBreakLineage(boolean needToBreakLineage) {
		this.needToBreakLineage = needToBreakLineage;
	}

	@Override
	public void startSnapshot(BackendSnapshotMeta backendSnapshotMeta) {
		synchronized (lock) {
			gContext.checkDBStatus();
			long checkpointId = backendSnapshotMeta.getCheckpointId();
			Preconditions.checkArgument(!runningSnapshots.containsKey(checkpointId), checkpointId + " is already running.");
			gContext.increaseCurVersion();
			// increment and record access number to protect files used by this snapshot will not be deleted when DB discards them.
			long accessNumber = gContext.incrementAndGetAccessNumber();
			runningSnapshotAccessNumber.add(accessNumber);
			minRunningSnapshotAccessNumber = runningSnapshotAccessNumber.first();

			long startTime = System.currentTimeMillis();
			LOG.info("GeminiDB start checkpoint {}, start time {}, access number {}.",
				checkpointId,
				startTime,
				accessNumber);

			try {
				SnapshotOperation snapshotOperation = localSnapshotEnabled ?
					new LocalAndDFSSnapshotOperation(gContext, this, dfsFileManager, localFileManager) :
					new DFSSnapshotOperation(gContext, this, dfsFileManager);

				PendingSnapshot pendingSnapshot = snapshotOperation.createPendingSnapshot(
					backendSnapshotMeta, accessNumber);
				runningSnapshots.put(checkpointId, pendingSnapshot);

				SnapshotCompletableFuture snapshotCompletableFuture = pendingSnapshot.getResultFuture();
				snapshotCompletableFuture.incRunningTask();
				writeBufferManager.doSnapshot(snapshotOperation);
				snapshotCompletableFuture.decRunningTask();

				pendingSnapshot.getSnapshotStat().setSyncStartTime(startTime);
				pendingSnapshot.getSnapshotStat().setAsyncStartTime(System.currentTimeMillis());
			} catch (Throwable e) {
				// fow now we catch everything and rethrow to see if we should do some error handle later.
				runningSnapshotAccessNumber.remove(accessNumber);
				minRunningSnapshotAccessNumber = !runningSnapshotAccessNumber.isEmpty() ?
					runningSnapshotAccessNumber.first() : Long.MAX_VALUE;
				throw e;
			}
		}
	}

	@Override
	public void endSnapshot(long checkpointId, Throwable throwable) {
		synchronized (lock) {
			PendingSnapshot pendingSnapshot = runningSnapshots.remove(checkpointId);
			if (pendingSnapshot != null) {
				pendingSnapshot.getSnapshotStat().setCompleteTime(System.currentTimeMillis());
				Set dataFileIDs = new HashSet<>();
				if (throwable == null && !pendingSnapshot.isCanceled()) {
					// add data files reference if snapshot successfully.
					for (int id : pendingSnapshot.getFileMapping().keySet()) {
						dfsFileManager.incSnapshotReference(new FileIDImpl(id));
						dataFileIDs.add(id);
					}

					CompletedSnapshot completedSnapshot = new CompletedSnapshot(checkpointId,
						pendingSnapshot.getSnapshotMetaPath().toUri().toString(),
						dataFileIDs);
					completedSnapshots.put(checkpointId, completedSnapshot);
					LOG.info("GeminiDB finished checkpoint {}, SnapshotStat {}",
						checkpointId, pendingSnapshot.getSnapshotStat());
				} else {
					if (!pendingSnapshot.isCanceled()) {
						LOG.warn("GeminiDB fail to complete checkpoint {}, exception {}", checkpointId, throwable);
					}

					// TODO currently, set pending checkpoint as cancelled would not interrupt the async checkpoint phase.
					// pending snapshot just canceled.
					discardCheckpointMetaFile(pendingSnapshot.getSnapshotMetaPath().toUri().toString());
				}

				pendingSnapshot.releaseResource();
				// update the access number
				runningSnapshotAccessNumber.remove(pendingSnapshot.getAccessNumber());
				minRunningSnapshotAccessNumber = !runningSnapshotAccessNumber.isEmpty() ?
					runningSnapshotAccessNumber.first() : Long.MAX_VALUE;

			} else {
				LOG.warn("checkpoint {} is not running, and can't be ended.", checkpointId);
			}
		}
	}

	@Override
	public long getMinRunningSnapshotAccessNumber() {
		return minRunningSnapshotAccessNumber;
	}

	/**
	 * Note this method should be called before endSnapshot for the same checkpoint.
	 */
	@Override
	public PendingSnapshot getPendingSnapshot(long checkpointId) {
		PendingSnapshot pendingSnapshot = runningSnapshots.get(checkpointId);
		if (pendingSnapshot == null) {
			throw new GeminiRuntimeException("there is no pending snapshot " + checkpointId);
		}
		return pendingSnapshot;
	}

	@Override
	public ExecutorService getSnapshotExecutor() {
		return snapshotExecutor;
	}

	@Override
	public void notifySnapshotComplete(long snapshotId) {
		// nothing to do.
	}

	@Override
	public void notifySnapshotAbort(long snapshotId) {
		CompletedSnapshot snapshotToAbort = null;
		boolean runningSnapshotCanceled = false;
		synchronized (lock) {
			PendingSnapshot pendingSnapshot = runningSnapshots.get(snapshotId);
			if (pendingSnapshot != null) {
				pendingSnapshot.resultFuture.setEndSnapshot();
				pendingSnapshot.setCanceled(true);
				runningSnapshotCanceled = true;
			}

			if (!runningSnapshotCanceled) {
				snapshotToAbort = completedSnapshots.remove(snapshotId);
			}
		}

		if (snapshotToAbort != null) {
			discardCompletedSnapshot(snapshotToAbort);
		}
	}

	@Override
	public void notifySnapshotSubsume(long snapshotId) {
		Set snapshotsToAbort = new HashSet<>();
		synchronized (lock) {
			Iterator> iterator = completedSnapshots.entrySet().iterator();
			while (iterator.hasNext()) {
				Map.Entry entry = iterator.next();
				if (entry.getKey() <= snapshotId) {
					iterator.remove();
					snapshotsToAbort.add(entry.getValue());
				} else {
					break;
				}
			}
		}
		for (CompletedSnapshot completedSnapshot : snapshotsToAbort) {
			discardCompletedSnapshot(completedSnapshot);
		}
	}

	@Override
	public Map restore(
		long snapshotId,
		Map fileMapping,
		String restoredBasePath) {
		Map snapshots;
		if (!needToBreakLineage) {
			snapshots = loadSnapshots(restoredBasePath, Collections.singleton(snapshotId));
			RestoredSnapshot restoredSnapshot = new RestoredSnapshot(
				snapshotId,
				getDFSSnapshotMetaPath(new Path(restoredBasePath), snapshotId).toUri().toString(),
				fileMapping);
			snapshots.put(snapshotId, restoredSnapshot);
			restoreSnapshots(snapshots);
			LOG.info("restore snapshot manager successfully with {} snapshots: {}", snapshots.size(), snapshots.keySet());
		} else {
			snapshots = Collections.emptyMap();
			LOG.info("no snapshot is restored because lineage needs to be broken");
		}
		return snapshots;
	}

	@Override
	public void close() throws IOException {
		synchronized (lock) {
			snapshotExecutor.shutdownNow();
			LOG.info("SnapshotManager is closed");
			runningSnapshotAccessNumber.clear();
			runningSnapshots.clear();
			completedSnapshots.clear();
		}
	}

	@VisibleForTesting
	Map getCompletedSnapshots() {
		return Collections.unmodifiableMap(completedSnapshots);
	}

	@VisibleForTesting
	Map getRunningSnapshots() {
		return Collections.unmodifiableMap(runningSnapshots);
	}

	private void restoreSnapshots(Map snapshots) {
		synchronized (lock) {
			for (Map.Entry entry : snapshots.entrySet()) {
				long checkpointId = entry.getKey();
				RestoredSnapshot restoredSnapshot = entry.getValue();
				completedSnapshots.put(checkpointId,
					new CompletedSnapshot(checkpointId,
						restoredSnapshot.getMetaFilePath(),
						restoredSnapshot.getFileMapping().keySet()));
			}
		}
	}

	private Map loadSnapshots(
		String restoredDBPath,
		Set excludeSnapshots) {
		Map snapshots = new HashMap<>();
		Path metaDirPath = new Path(restoredDBPath, SNAPSHOT_DIR);
		FileStatus[] fileStatusArray;
		try {
			fileStatusArray = FileSystem.get(metaDirPath.toUri()).listStatus(metaDirPath);
		} catch (Exception e) {
			LOG.error("failed to list dir status for {} when loading snapshots, {}", metaDirPath, e);
			return snapshots;
		}
		if (fileStatusArray == null) {
			return snapshots;
		}
		for (FileStatus fileStatus : fileStatusArray) {
			Path path = fileStatus.getPath();
			String fileName = path.getName();
			long snapshotId;
			try {
				snapshotId = getSnapshotID(fileName);
			} catch (Exception e) {
				LOG.error("failed to get snapshot ID caused by {}", e);
				continue;
			}
			if (excludeSnapshots.contains(snapshotId)) {
				LOG.info("skip to load snapshot {}", snapshotId);
				continue;
			}
			try (SnapshotMetaFile.Reader reader = SnapshotMetaFile.getReader(path)) {
				// TODO checksum
				long fileSize = fileStatus.getLen();
				// record the offset of file mapping
				reader.seek(fileSize - 16);
				long fileMappingOffset = reader.readLong();
				reader.seek(fileMappingOffset);
				boolean hasFileMapping = reader.readBoolean();
				Preconditions.checkState(hasFileMapping, "file mapping should always exist.");
				int fileMappingSize = reader.readInt();
				// just read base path, but do not use it
				reader.readUTF();
				Map fileIDToPath = new HashMap<>();
				for (int i = 0; i < fileMappingSize; ++i) {
					String filePath = reader.readUTF();
					Integer id = reader.readInt();
					fileIDToPath.put(id, filePath);
				}
				snapshots.put(snapshotId, new RestoredSnapshot(
					snapshotId, path.toUri().toString(), fileIDToPath));
				LOG.info("successfully load snapshot {} with {} files", snapshotId, fileIDToPath.size());
			} catch (Exception e) {
				LOG.error("failed to load snapshot {}, {}", snapshotId, e);
			}
		}
		return snapshots;
	}

	public Path getDFSSnapshotMetaPath(Path basePath, long checkpointId) {
		String name = SNAPSHOT_FILE_PREFIX + SNAPSHOT_FILE_SEPERATOR + checkpointId;
		return new Path(basePath, new Path(SNAPSHOT_DIR, name));
	}

	public Path getLocalSnapshotMetaPath(Path basePath, long checkpointId) {
		String name = SNAPSHOT_FILE_PREFIX + SNAPSHOT_FILE_SEPERATOR + checkpointId;
		return new Path(basePath, name);
	}

	@SuppressWarnings("unchecked")
	public MapSerializer>> getFileMappingSerializer() {
		TupleSerializer> tuple2Serializer = new TupleSerializer<>(
			(Class>) (Class) Tuple2.class,
			new TypeSerializer[]{IntSerializer.INSTANCE, LongSerializer.INSTANCE}
		);
		MapSerializer> groupMapSerializer = new MapSerializer<>(
			IntSerializer.INSTANCE, tuple2Serializer);
		return new MapSerializer<>(IntSerializer.INSTANCE, groupMapSerializer);
	}

	private long getSnapshotID(String snapshotMetaName) {
		String[] splits = snapshotMetaName.split(SNAPSHOT_FILE_SEPERATOR);
		if (splits.length == 2 && SNAPSHOT_FILE_PREFIX.equals(splits[0])) {
			try {
				long snapshotID = Long.valueOf(splits[1]);
				if (snapshotID > 0) {
					return snapshotID;
				}
			} catch (Exception e) {
				// parse snapshot failed
			}
		}
		throw new IllegalArgumentException("invalid snapshot meta file name " + snapshotMetaName);
	}

	private void discardCompletedSnapshot(CompletedSnapshot completedSnapshot) {
		for (Integer fileId : completedSnapshot.getDataFileIDs()) {
			dfsFileManager.decSnapshotReference(new FileIDImpl(fileId));
		}

		discardCheckpointMetaFile(completedSnapshot.getMetaFilePath());
		LOG.info("Discard snapshot {} when this snapshot is notified as useless.", completedSnapshot.getCheckpointID());
	}

	private void discardCheckpointMetaFile(String metaFilePath) {
		try {
			fileCleaner.registerFilesToClean(Collections.singleton(metaFilePath));
		} catch (Exception e) {
			LOG.error("Failed to delete snapshot meta file " + metaFilePath, e);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy