All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.fs.FileManagerImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.fs;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.runtime.state.gemini.engine.ExceptionStat;
import org.apache.flink.runtime.state.gemini.engine.FTFileWriter;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.metrics.FileManagerMetrics;
import org.apache.flink.runtime.state.gemini.engine.page.DataPageUtil;
import org.apache.flink.runtime.state.gemini.engine.page.DfsDataPageUtil;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.ThreadFactoryBuilder;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

/**
 * Implementation of {@link FileManager}.
 */
public class FileManagerImpl implements FileManager {

	private static final Logger LOG = LoggerFactory.getLogger(FileManagerImpl.class);

	private final GContext gContext;

	private final String fileManagerIdentifier;

	private final Path workingBasePath;

	private final Integer writerFailCountThreshold;

	private final Integer fileManagerFailCountThreshold;

	/**
	 * The interval we'll retry to create new file writer
	 * after we have disabled creating file writer because of all filewrite have been bad before.
	 */
	private final long retryCreateFileWriterInterval;

	/**
	 * When do we disabled creating new file writer.
	 * f0 -> disableCreateFileWriterTimestamp.
	 * f1 -> fileWriterContinuesErrorCount.
	 */
	private Tuple2 fileWriterErrorStatus = new Tuple2<>();

	private ExceptionStat exceptionStat;

	private final String invalidMessage;

	/**
	 * map of file id to file name.
	 */
	private final Map fileMapping;

	/**
	 * File id generator itself is not thread-safe, and we should synchronize it outside.
	 */
	private final FileIDGenerator fileIDGenerator;

	/**
	 * Identifier of backend uid, usually operator attempt id.
	 */
	private final String backendUID;

	/**
	 * Suffix of filename, just increase from zero.
	 */
	private final AtomicLong fileSuffix;

	/**
	 * Whether this file manager is used by snapshot.
	 */
	private final boolean snapshotStorage;

	private final long fileAliveTimeAfterNoDBReference;

	/**
	 * A concurrent set of files which are added to this set when their DB reference become 0. Files are
	 * removed from the set when the accessNumber is larger than{@link FileMeta#discardAccessNumber}
	 * and the time is larger than {@link FileMeta#discardTimeStamp}. Note that files are still in
	 * {@link FileManagerImpl#fileMapping}, and may be still used by snapshot. This set may be concurrently
	 * accessed by two types of thread. The one is to add the file to this set when the db reference becomes 0,
	 * and the other one is the thread where {@link DBDeletionCheckRunner} runs to remove the file from
	 * the set.
	 */
	private final SortedSet waitingDBDeletionFiles;

	/**
	 * A concurrent set of files who are not used by DB, but are still used by some snapshots, that's their
	 * {@link FileMeta#snapshotReference} are not 0. A {@link DBDeletionCheckRunner} will periodically
	 * check the files in {@link FileManagerImpl#waitingDBDeletionFiles}, and decide whether files
	 * should be moved to this set. Note files in this set are still in {@link FileManagerImpl#fileMapping}.
	 * This set may be concurrently accessed by two types of thread. The one is the thread where {@link DBDeletionCheckRunner}
	 * runs, and the other one is some threads where snapshot is discarded, eg. threads used to execute
	 * {@link org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManager#notifySnapshotAbort} or
	 * {@link org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManager#notifySnapshotSubsume}.
	 */
	private final Set waitingSnapshotDeletionFiles;

	/**
	 * A concurrent set of files who are not used by DB and snapshot, and have been removed from
	 * {@link FileManagerImpl#fileMapping}.
	 */
	private final Set markedDeletionFiles;

	/**
	 * Executor service to periodically check whether the files in waitingDBDeletionFiles can be moved to
	 * waitingSnapshotDeletionFiles.
	 */
	private final ScheduledThreadPoolExecutor fileDeletionCheckExecutor;

	private final FileManagerStat fileManagerStat;

	private long lastPrintStatTime;

	private volatile boolean closed;

	private final DataPageUtil dataPageUtil;

	@Nullable
	private FileManagerMetrics fileManagerMetrics;

	public FileManagerImpl(GContext gContext, String fileManagerIdentifier, Path workingBasePath) {
		this(gContext, fileManagerIdentifier, workingBasePath, false);
	}

	public FileManagerImpl(
		GContext gContext, String fileManagerIdentifier, Path workingBasePath, boolean snapshotStorage) {
		this(gContext, fileManagerIdentifier, workingBasePath, snapshotStorage, new DfsDataPageUtil(gContext.getGConfiguration().isChecksumEnable()));
	}

	public FileManagerImpl(
		GContext gContext,
		String fileManagerIdentifier,
		Path workingBasePath,
		boolean snapshotStorage,
		DataPageUtil dataPageUtil) {
		this.gContext = Preconditions.checkNotNull(gContext);
		this.fileManagerIdentifier = fileManagerIdentifier;
		this.workingBasePath = workingBasePath;

		this.fileMapping = new ConcurrentHashMap<>();
		this.fileIDGenerator = new FileIDGenerator(
			gContext.getGConfiguration().getSubTaskIndex(), gContext.getGConfiguration().getNumParallelSubtasks());

		this.backendUID = gContext.getGConfiguration().getBackendUID();
		this.fileSuffix = new AtomicLong(0);
		this.snapshotStorage = snapshotStorage;
		this.fileAliveTimeAfterNoDBReference = gContext.getGConfiguration().getFileAliveTimeAfterNoDataReference();
		this.waitingDBDeletionFiles = new ConcurrentSkipListSet<>(new Comparator() {
			@Override
			public int compare(FileMeta o1, FileMeta o2) {
				long diffAccessNumber = o1.getDiscardAccessNumber() - o2.getDiscardAccessNumber();
				if (diffAccessNumber != 0) {
					return diffAccessNumber < 0 ? -1 : 1;
				}
				long diffTimestamp = o1.getDiscardAccessNumber() - o2.getDiscardAccessNumber();
				if (diffTimestamp != 0) {
					return diffTimestamp < 0 ? -1 : 1;
				}
				return Integer.compare(o1.getFileId().get(), o2.getFileId().get());
			}
		});
		this.waitingSnapshotDeletionFiles = Collections.newSetFromMap(new ConcurrentHashMap<>());
		this.markedDeletionFiles = Collections.newSetFromMap(new ConcurrentHashMap<>());

		this.lastPrintStatTime = 0;
		this.fileManagerStat = new FileManagerStat();
		MetricGroup fileManagerMetricGroup = gContext.getFileManagerMetricGroup();
		if (fileManagerMetricGroup != null) {
			MetricGroup metricGroup = fileManagerMetricGroup.addGroup(fileManagerIdentifier);
			this.fileManagerMetrics = new FileManagerMetrics(metricGroup, gContext.getGConfiguration().getMetricSampleCount());
			fileManagerMetrics.register(fileManagerStat);
			fileManagerMetrics.registerUsedFile(fileMapping::size);
			fileManagerMetrics.registerWaitingDbDeletion(waitingDBDeletionFiles::size);
			fileManagerMetrics.registerWaitingSnapshotDeletion(waitingSnapshotDeletionFiles::size);
			fileManagerMetrics.registerMarkDeletionFile(markedDeletionFiles::size);
		}

		this.fileDeletionCheckExecutor = new ScheduledThreadPoolExecutor(1,
			new ThreadFactoryBuilder().setNameFormat(
				gContext.getGConfiguration().getExcetorPrefixName() + "FileManager-" + fileManagerIdentifier + "-%d").build());
		this.fileDeletionCheckExecutor.setRemoveOnCancelPolicy(true);
		this.fileDeletionCheckExecutor.setContinueExistingPeriodicTasksAfterShutdownPolicy(false);
		this.fileDeletionCheckExecutor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);

		this.closed = false;

		this.writerFailCountThreshold = gContext.getGConfiguration().getWriterFailCountThreshold();
		this.fileManagerFailCountThreshold = gContext.getGConfiguration().getFileManagerFailCountThreshold();
		// *1000 transfer to millsecond.
		this.retryCreateFileWriterInterval = gContext.getGConfiguration().getFileManagerCreateFileWriterRetryInterval() * 1000;
		this.fileWriterErrorStatus.f0 = -1L;
		this.fileWriterErrorStatus.f1 = 0;
		this.exceptionStat = new ExceptionStat();
		if (gContext.getExceptionMetrics() != null) {
			gContext.getExceptionMetrics().register(exceptionStat);
		}

		this.invalidMessage = "Can't create file writer anymore, because of exceed the threshold(" + this.fileManagerFailCountThreshold + ") of continues filewriter error";
		this.dataPageUtil = dataPageUtil;
		LOG.info("FileManager is created for {}", workingBasePath);
	}

	@Override
	public void start() {
		this.fileDeletionCheckExecutor.scheduleAtFixedRate(new DBDeletionCheckRunner(),
			0, gContext.getGConfiguration().getFileDeletionCheckInterval(), TimeUnit.MILLISECONDS);
		LOG.info("FileManager is started for {}", workingBasePath);
	}

	@Override
	public String getFileManagerIdentifier() {
		return fileManagerIdentifier;
	}

	@Override
	public Path getBasePath() {
		return workingBasePath;
	}

	@Override
	public String getFilePath(FileID fileID) {
		return getFilePath(fileID.get());
	}

	@Override
	public String getFilePath(int fileId) {
		FileMeta fileMeta = fileMapping.get(fileId);
		Preconditions.checkNotNull(fileMeta, "file not in file mapping for " + fileId);
		return fileMeta.getFilePath();
	}

	@Override
	public FileID getFileID(long address) {
		return new FileIDImpl(getIDFromAddress(address));
	}

	@Override
	public long getAddress(FileID fileID, long offset) {
		return  ((long) fileID.get()) << 32 | offset;
	}

	@Override
	public long getFileOffset(long address) {
		return address & 0xFFFF_FFFFL;
	}

	@VisibleForTesting
	FileID getCurrentFileID() {
		return fileIDGenerator.get();
	}

	private int getIDFromAddress(long address) {
		return (int) (address >>> 32);
	}

	@Override
	public FileReader getFileReader(long address) {
		checkDBStatus();
		int id = getIDFromAddress(address);
		FileMeta fileMeta = fileMapping.get(id);
		Preconditions.checkNotNull(fileMeta, "file not in file mapping for file id " + id);
		FileReader fileReader = fileMeta.getFileReader();
		if (fileReader == null) {
			synchronized (fileMeta) {
				fileReader = fileMeta.getFileReader();
				if (fileReader == null) {
					GeminiInputStream inputStream = null;
					try {
						inputStream = new GeminiInputStream(new Path(fileMeta.getFilePath()));
						fileReader = new FileReaderImpl(inputStream, fileMeta);
						fileMeta.setFileReader(fileReader);
						if (closed) {
							fileMeta.setFileReader(null);
							LOG.warn("File manager has been closed, and close file reader for file {}", fileMeta);
							throw new GeminiRuntimeException("File manager has been closed");
						}
					} catch (Exception e) {
						// Exception may happen before we set file reader, so we need to close the stream
						// if exception happens
						try {
							if (fileReader != null) {
								fileReader.close();
							} else if (inputStream != null) {
								inputStream.close();
							}
						} catch (Exception innerException) {
							LOG.error("failed to close input stream {}, {}", inputStream, innerException);
						}
						throw new GeminiRuntimeException("failed to create file reader for " + fileMeta, e);
					}
				}
			}
		}
		return fileReader;
	}

	@Override
	public FileWriter createNewFileWriter() {
		checkDBStatus();
		Tuple2 tuple = getNewFilePath();
		FileID fileID = tuple.f0;
		String filePath = tuple.f1;
		FileMeta fileMeta = new FileMeta(filePath, fileID);
		FileWriter fileWriter = null;
		GeminiOutputStream outputStream = null;
		try {
			if (!isValid()) {
				//TODO: #SR Currently we do support snapshot file manager shift and recovery only.
				//    When using deaggregative mode, we should not throw exception directly maybe.
				throw new IllegalStateException(invalidMessage);
			}
			outputStream = new GeminiOutputStream(new Path(filePath));
			fileWriter = new FTFileWriter(outputStream, this, fileID, filePath, writerFailCountThreshold, exceptionStat);
			fileMeta.setFileWriter(fileWriter);
			fileMeta.addAndGetDBReference(1);
			FileMeta oldFileMeta = fileMapping.putIfAbsent(fileID.get(), fileMeta);
			Preconditions.checkState(oldFileMeta == null, "{} has existed", fileMeta);
			if (closed && fileMapping.remove(fileID.get()) != null) {
				LOG.warn("File manager has been closed, and close file writer for file {}", fileMeta);
				throw new GeminiRuntimeException("File manager has been closed.");
			}
		} catch (Exception e) {
			// Exception may happen before we add the fileMeta to file mapping,
			// so we need to close the stream if exception happens
			try {
				if (fileWriter != null) {
					fileWriter.close();
				} else if (outputStream != null) {
					outputStream.close();
				}
			} catch (Exception innerException) {
				LOG.error("failed to close output stream, {}, {}", outputStream, innerException);
			}
			recycleFileID(fileID);
			// try to delete file for safety
			deleteFile(filePath, false);
			fileManagerStat.addTotalFailCreateFile(1);
			throw new GeminiRuntimeException("failed to create new file writer for " + fileID, e);
		}
		fileManagerStat.addTotalCreatedFile(1);
		fileManagerStat.setMaxUsedFile(fileMapping.size());
		LOG.debug("create new file {}", fileMeta);
		return fileWriter;
	}

	/**
	 * This method is not thread-safe for the same file writer, and we should guarantee the safety outside.
	 */
	@Override
	public void closeFileWriter(FileWriter fileWriter) {
		checkDBStatus();
		Preconditions.checkNotNull(fileWriter);
		FileID fileID = fileWriter.getFileID();
		FileMeta fileMeta = fileMapping.get(fileID.get());
		Preconditions.checkNotNull(fileMeta, "file not in file mapping for {}", fileID);
		try {
			fileMeta.setFileSize(fileWriter.getSize());
		} catch (Exception e) {
			LOG.error("failed to get file size: {}, {}", fileMeta, e);
		} finally {
			if (fileWriter.isValid()) {
				resetFileWriterErrorCount();
			} else {
				increaseFileWriterErrorCount();
			}
			try {
				fileWriter.close();
				LOG.debug("close file writer successfully: {}", fileMeta);
			} catch (Exception e) {
				LOG.error("failed to close file writer: file id {}, {}", fileMeta, e);
			}
			fileMeta.setFileWriter(null);
			internalDecDBReference(fileMeta, gContext.getAccessNumber(), System.currentTimeMillis(), 0);
		}
	}

	/**
	 * Returns the mapping of new file id to the new file path.
	 */
	@VisibleForTesting
	Tuple2 getNewFilePath() {
		FileID fileID;
		synchronized (fileIDGenerator) {
			fileID = fileIDGenerator.generate();
		}
		String filePath = new Path(workingBasePath, backendUID + "-" + fileSuffix.getAndIncrement()).toUri().toString();
		LOG.debug("FileID {} with new path {} is ready to create.", fileID, filePath);
		return Tuple2.of(fileID, filePath);
	}

	@Override
	public void incDBReference(long address, long dataSize) {
		checkDBStatus();
		FileMeta fileMeta = fileMapping.get(getIDFromAddress(address));
		Preconditions.checkNotNull(fileMeta);
		internalIncDBReference(fileMeta, dataSize);
	}

	private void internalIncDBReference(FileMeta fileMeta, long dataSize) {
		long ref = fileMeta.addAndGetDBReference(1);
		// sanity check
		Preconditions.checkState(ref > 0, "snapshot reference should be positive");
		fileMeta.addAndGetDataSize(dataSize);
		fileManagerStat.addTotalDataSize(dataSize);
	}

	@Override
	public void decDBReference(long address, long accessNumber, long ts, long dataSize) {
		checkDBStatus();
		FileMeta fileMeta = fileMapping.get(getIDFromAddress(address));
		Preconditions.checkNotNull(fileMeta);
		internalDecDBReference(fileMeta, accessNumber, ts, dataSize);
	}

	private void internalDecDBReference(FileMeta fileMeta, long accessNumber, long ts, long dataSize) {
		fileMeta.updateDiscardAccessNumberAndTimestamp(accessNumber, ts);
		fileMeta.addAndGetDataSize(-dataSize);
		long ref = fileMeta.addAndGetDBReference(-1);
		// sanity check
		Preconditions.checkState(ref >= 0, "data reference should not be negative");
		if (ref == 0) {
			boolean success = waitingDBDeletionFiles.add(fileMeta);
			Preconditions.checkState(success, "failed to add file to waitingDBDeletionFiles " + fileMeta);
			LOG.debug("add file to waitingDBDeletionFiles {}", fileMeta);
		}
		fileManagerStat.addTotalDataSize(-dataSize);
	}

	@Override
	public void incSnapshotReference(FileID fileID) {
		checkDBStatus();
		if (snapshotStorage) {
			FileMeta fileMeta = fileMapping.get(fileID.get());
			Preconditions.checkNotNull(fileMeta);
			long ref = fileMeta.addAndGetSnapshotReference(1);
			Preconditions.checkState(ref > 0, "snapshot reference should be positive");
		}
	}

	@Override
	public void decSnapshotReference(FileID fileID) {
		checkDBStatus();
		if (snapshotStorage) {
			FileMeta fileMeta = fileMapping.get(fileID.get());
			Preconditions.checkNotNull(fileMeta);
			long ref = fileMeta.addAndGetSnapshotReference(-1);
			Preconditions.checkState(ref >= 0, "snapshot reference should not be negative");
			if (ref == 0) {
				if (waitingSnapshotDeletionFiles.remove(fileMeta)) {
					// mark file deletion only when remove is successful to avoid
					// concurrent deletion in DBDeletionCheckRunner
					markFileDeletion(fileMeta);
					LOG.debug("Mark file deletion by snapshot {}", fileMeta);
				}
			}
		}
	}

	@Override
	public Set getMarkedDeletionFiles() {
		Set deletedFilePath = new HashSet<>();
		Iterator iterator = markedDeletionFiles.iterator();
		while (iterator.hasNext()) {
			deletedFilePath.add(iterator.next());
			iterator.remove();
		}
		return deletedFilePath;
	}

	@Override
	public void restore(Map restoredFileMapping) {
		Set fileIDS = new HashSet<>();
		long totalDataSize = 0;
		for (Map.Entry entry : restoredFileMapping.entrySet()) {
			int id = entry.getKey();
			FileMeta.RestoredFileMeta restoredFileMeta = entry.getValue();
			FileMeta fileMeta = fileMapping.get(id);
			if (fileMeta != null) {
				LOG.error("file mapping should not have contained file id {}, old path: {}, new path: {}",
					id, fileMeta.getFilePath(), restoredFileMeta.filePath);
				throw new GeminiRuntimeException("file mapping should not have contained file id " + id);
			}
			Preconditions.checkState(restoredFileMeta.dbReference != 0 || restoredFileMeta.snapshotReference != 0,
				"db reference and snapshot reference can not be both 0: " +
				restoredFileMeta.id + ", " + restoredFileMeta.filePath);
			FileID fileID = new FileIDImpl(restoredFileMeta.id);
			fileMeta = new FileMeta(
				restoredFileMeta.filePath,
				fileID,
				restoredFileMeta.fileSize,
				restoredFileMeta.dataSize,
				restoredFileMeta.dbReference,
				restoredFileMeta.snapshotReference,
				restoredFileMeta.canDeleted
			);
			fileMapping.put(id, fileMeta);
			fileIDS.add(fileID);
			totalDataSize += restoredFileMeta.dataSize;
			// those file only used by snapshots should be added into waitingSnapshotDeletionFiles
			if (fileMeta.addAndGetDBReference(0) == 0) {
				waitingSnapshotDeletionFiles.add(fileMeta);
			}
		}
		fileIDGenerator.restoreFileIDs(fileIDS);
		fileManagerStat.addTotalDataSize(totalDataSize);
		fileManagerStat.setMaxUsedFile(fileMapping.size());
		fileManagerStat.setNumberUsedFile(fileMapping.size());
		LOG.info("restore file manager successfully: {}", workingBasePath);
	}

	@Override
	public Map getFileMapping(Set fileIDs) {
		Set extractFileIDs = fileIDs.stream().map(FileID::get).collect(Collectors.toSet());
		return fileMapping.entrySet().stream().filter(extractFileIDs::contains)
			.collect(Collectors.toMap(f -> new FileIDImpl(f.getKey()), e -> e.getValue().getFilePath()));
	}

	@Override
	public void increaseFileWriterErrorCount() {
		synchronized (fileWriterErrorStatus) {
			if (fileWriterErrorStatus.f0 == -1) {
				fileWriterErrorStatus.f0 = System.currentTimeMillis();
			}
			fileWriterErrorStatus.f1++;
		}

		exceptionStat.addTotalFileWriterShift(1);
	}

	@Override
	public void resetFileWriterErrorCount() {
		synchronized (fileWriterErrorStatus) {
			fileWriterErrorStatus.f0 = -1L;
			fileWriterErrorStatus.f1 = 0;
		}
	}

	@Override
	public boolean isValid() {
		boolean ret = true;
		int errorCount;
		long disableCreateFileWriterTimestamp;
		synchronized (fileWriterErrorStatus) {
			errorCount = fileWriterErrorStatus.f1;
			disableCreateFileWriterTimestamp = fileWriterErrorStatus.f0;
		}
		if (errorCount >= fileManagerFailCountThreshold) {
			if (System.currentTimeMillis() - disableCreateFileWriterTimestamp < retryCreateFileWriterInterval) {
				ret = false;
			} else {
				resetFileWriterErrorCount();
			}
		}

		return ret;
	}

	@Override
	public void close() {
		synchronized (this) {
			if (closed) {
				LOG.warn("FileManager ({}) has been closed", workingBasePath);
				return;
			}
			closed = true;
		}

		fileDeletionCheckExecutor.shutdownNow();
		if (snapshotStorage) {
			for (FileMeta fileMeta : fileMapping.values()) {
				if (fileMeta.addAndGetSnapshotReference(0) == 0) {
					FileWriter fileWriter = fileMeta.getFileWriter();
					if (fileWriter != null) {
						try {
							fileWriter.close();
						} catch (Exception e) {
							LOG.error("failed to close writer when marking deletion, {}, ", fileMeta, e);
						} finally {
							fileMeta.setFileWriter(null);
						}
					}
					FileReader fileReader = fileMeta.getFileReader();
					if (fileReader != null) {
						try {
							fileReader.close();
						} catch (Exception e) {
							LOG.error("failed to close reader when marking deletion, {}, {}", fileMeta, e);
						} finally {
							fileMeta.setFileReader(null);
						}
					}
					if (fileMeta.canDeleted()) {
						markedDeletionFiles.add(fileMeta.getFilePath());
					}
				}
			}
			fileMapping.clear();
			gContext.getSupervisor().getFileCleaner().triggerCleanup(this);
		} else {
			try {
				workingBasePath.getFileSystem().delete(workingBasePath, true);
				LOG.info("FileManager is not a snapshot storage, delete the whole working base path, {}", workingBasePath);
			} catch (Exception e) {
				LOG.warn("Fail to delete the working base path {}", workingBasePath);
			}
		}
		LOG.info("File manager ({}) is closed",  workingBasePath);
	}

	@Override
	public String toString() {
		return "FileManager{" + workingBasePath + "}";
	}

	@VisibleForTesting
	FileIDGenerator getFileIDGenerator() {
		return fileIDGenerator;
	}

	Map getFileMapping() {
		return Collections.unmodifiableMap(fileMapping);
	}

	@VisibleForTesting
	SortedSet getWaitingDBDeletionFiles() {
		return waitingDBDeletionFiles;
	}

	@VisibleForTesting
	Set getWaitingSnapshotDeletionFiles() {
		return waitingSnapshotDeletionFiles;
	}

	private void markFileDeletion(FileMeta fileMeta) {
		FileID fileID = fileMeta.getFileId();
		FileMeta oldFileMeta = fileMapping.get(fileID.get());
		Preconditions.checkState(oldFileMeta == fileMeta, "delete a file not in mapping table");
		// recheck whether the stream has been closed
		FileWriter fileWriter = fileMeta.getFileWriter();
		if (fileWriter != null) {
			try {
				fileWriter.close();
			} catch (Exception e) {
				LOG.error("failed to close writer when marking deletion, {}, ", fileMeta, e);
			} finally {
				fileMeta.setFileWriter(null);
			}
		}
		FileReader fileReader = fileMeta.getFileReader();
		if (fileReader != null) {
			try {
				fileReader.close();
			} catch (Exception e) {
				LOG.error("failed to close reader when marking deletion, {}, {}", fileMeta, e);
			} finally {
				fileMeta.setFileReader(null);
			}
		}
		// for rescale or resume in Flink, do not delete the file
		if (fileMeta.canDeleted()) {
			markedDeletionFiles.add(fileMeta.getFilePath());
			if (closed) {
				// for safety, force to delete the file here
				deleteFile(fileMeta.getFilePath(), false);
			}
		}
		fileMapping.remove(fileID.get());
		recycleFileID(fileID);
		fileManagerStat.addTotalDeletedFile(1);
	}

	private void checkDBStatus() {
		if (closed) {
			throw new GeminiRuntimeException("FileManager (" + workingBasePath + ") has been closed");
		}
	}

	private void recycleFileID(FileID fileID) {
		synchronized (fileIDGenerator) {
			fileIDGenerator.recycleFileID(fileID);
		}
	}

	private void deleteFile(String fileName, boolean recursive) {
		try {
			Path path = new Path(fileName);
			FileSystem.get(path.toUri()).delete(path, recursive);
		} catch (Exception e) {
			LOG.warn("Fail to delete file {}, {}", fileName, e);
		}
	}

	private void printStat() {
		if (LOG.isDebugEnabled()) {
			long time = System.currentTimeMillis();
			if (lastPrintStatTime + 60000 < time) {
				lastPrintStatTime = time;
				fileManagerStat.setNumberUsedFile(fileMapping.size());
				fileManagerStat.setNumberWaitingDBDeletionFile(waitingDBDeletionFiles.size());
				fileManagerStat.setNumberWaitingSnapshotDeletionFile(waitingSnapshotDeletionFiles.size());
				fileManagerStat.setNumberMarkDeletionFile(markedDeletionFiles.size());
				LOG.info("FileManagerStat {}, {}", workingBasePath, fileManagerStat);
			}
		}
	}

	private long getCurrentAccessNumber() {
		// files whose discardAccessNumber is less than both minimum snapshot access number and
		// current DB access number can be deleted
		return Math.min(gContext.getAccessNumber(), gContext.getMinSnapshotAccessNumber());
	}

	/**
	 * A {@link Runnable} which periodically check whether the files in {@link FileManagerImpl#waitingDBDeletionFiles}
	 * are still used by DB according to {@link FileMeta#discardAccessNumber} and {@link FileMeta#discardTimeStamp}.
	 * If the file are not used by DB, it will be moved to {@link FileManagerImpl#waitingSnapshotDeletionFiles}.
	 */
	private class DBDeletionCheckRunner implements Runnable {
		@Override
		public void run() {
			long currentTime = System.currentTimeMillis();
			long deleteTime = currentTime - fileAliveTimeAfterNoDBReference;
			while (!closed && !waitingDBDeletionFiles.isEmpty()) {
				FileMeta fileMeta = waitingDBDeletionFiles.first();
				long currentAccessNumber = getCurrentAccessNumber();
				// guarantee the file is not used by DB and running snapshots
				if (fileMeta.getDiscardAccessNumber() >= currentAccessNumber ||
					fileMeta.getDiscardTimeStamp() >= deleteTime) {
					break;
				}
				waitingDBDeletionFiles.remove(fileMeta);
				// now we can close the reader
				try {
					FileReader fileReader = fileMeta.getFileReader();
					if (fileReader != null) {
						fileReader.close();
						fileMeta.setFileReader(null);
					}
				} catch (Exception e) {
					LOG.error("failed to close file reader when moving from db deletion set to snapshot set, {}, {}", fileMeta, e);
				}
				// add the file meta to the waitingSnapshotDeletionFiles before check the snapshot reference,
				// otherwise there may be concurrency problem as follows:
				// 1. check snapshot reference is 1, and decide to add the file meta to the waitingSnapshotDeletionFiles,
				//    but not yet complete
				// 2. decSnapshotReference leads to snapshot reference to 0, and try to remove the file meta from
				//    waitingSnapshotDeletionFiles, but step 1 has not completed, so remove will fail
				// 3. add file meta to waitingSnapshotDeletionFiles successfully
				// if we do nothing after 3, the file meta will never be deleted. So we need to add the file meta first, and
				// then check the snapshot reference.
				waitingSnapshotDeletionFiles.add(fileMeta);
				LOG.debug("Add file to waitingSnapshotDeletionFiles {}, current time {}, current access number {}",
					fileMeta, currentTime, currentAccessNumber);
				if (fileMeta.addAndGetSnapshotReference(0) == 0) {
					if (waitingSnapshotDeletionFiles.remove(fileMeta)) {
						// mark file deletion only when remove is successful to avoid
						// concurrent deletion in decSnapshotReference
						markFileDeletion(fileMeta);
						LOG.debug("Mark file deletion by DB {}", fileMeta);
					}
				}
			}
			printStat();
		}
	}

	@Override
	public DataPageUtil getDataPageUtil() {
		return dataPageUtil;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy