All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotCompactionImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 *
 *  * Licensed to the Apache Software Foundation (ASF) under one
 *  * or more contributor license agreements.  See the NOTICE file
 *  * distributed with this work for additional information
 *  * regarding copyright ownership.  The ASF licenses this file
 *  * to you under the Apache License, Version 2.0 (the
 *  * "License"); you may not use this file except in compliance
 *  * with the License.  You may obtain a copy of the License at
 *  *
 *  *     http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS,
 *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  * See the License for the specific language governing permissions and
 *  * limitations under the License.
 *
 */

package org.apache.flink.runtime.state.gemini.engine.snapshot;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.gemini.engine.GRegionID;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.filecache.PageBatchFlusher;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileMeta;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Implementation of {@link SnapshotCompaction}.
 */
public class SnapshotCompactionImpl implements SnapshotCompaction {

	private static final Logger LOG = LoggerFactory.getLogger(SnapshotCompactionImpl.class);

	/**
	 * DB context.
	 */
	private final GContext gContext;

	/**
	 * DFS file manager used for snapshot.
	 */
	private final FileManager dfsFileManager;

	/**
	 * Pending snapshot this compaction belongs to.
	 */
	private final SnapshotManager.PendingSnapshot pendingSnapshot;

	/**
	 * Target amplification ratio to compact to.
	 */
	private final float targetRatio;

	/**
	 * Mapping between file id and a tuple which contains the number of pages
	 * and shared data size.
	 */
	private final Map> sharedFiles;

	/**
	 * Counter for the number of running flush tasks triggered by compaction.
	 */
	private AtomicInteger compactionTaskCounter;

	public SnapshotCompactionImpl(GContext gContext, SnapshotManager.PendingSnapshot pendingSnapshot) {
		this.gContext = gContext;
		this.dfsFileManager = gContext.getSupervisor().getDfsFileManager();
		this.pendingSnapshot = pendingSnapshot;
		this.targetRatio = gContext.getGConfiguration().getSnapshotCompactionTargetRatio();
		Preconditions.checkArgument(targetRatio >= 1.0f,
			"Snapshot compaction target ratio can't be less than 1");
		this.sharedFiles = new ConcurrentHashMap<>();
		this.compactionTaskCounter = new AtomicInteger(0);
	}

	@Override
	public void recordSharedPage(PageAddress page) {
		// snapshot will synchronize with file manager using access number before start,
		// so files used by snapshot won't be deleted, and the file id must exist
		int fileId = dfsFileManager.getSimpleFileID(page.getDfsAddress());
		Tuple2 tuple = sharedFiles.computeIfAbsent(fileId,
			(para) -> Tuple2.of(new AtomicInteger(0), new AtomicLong(0)));
		tuple.f0.addAndGet(1);
		tuple.f1.addAndGet(page.getDataLen());
	}

	// implement interface for SnapshotStage ===============================================

	@Override
	public boolean isAsync() {
		return true;
	}

	@Override
	public void run() {
		SnapshotCheckResult checkResult = checkSnapshot();
		updateSnapshotStat(checkResult);
		if (checkResult.isNeedCompaction) {
			transferPages(checkResult);
		}
	}

	@VisibleForTesting
	Map> getSharedFiles() {
		return sharedFiles;
	}

	@VisibleForTesting
	SnapshotCheckResult checkSnapshot() {
		SnapshotStat snapshotStat = pendingSnapshot.getSnapshotStat();
		long totalDataSize = snapshotStat.addAndGetTotalSize(0);
		long totalFileSize = totalDataSize;
		long incrementalDataSize = snapshotStat.addAndGetIncrementalSize(0);

		// Set of files currently opened for write
		Set fileOpenedForWrite = new HashSet<>();
		List fileInfoList = new ArrayList<>();
		for (Map.Entry> entry : sharedFiles.entrySet()) {
			int fileId = entry.getKey();
			int numPage = entry.getValue().f0.get();
			long sharedDataSize = entry.getValue().f1.get();
			// snapshot will synchronize with file manager using access number before start,
			// so files used by snapshot won't be deleted, and the file meta must exist
			FileMeta fileMeta = dfsFileManager.getFileMeta(fileId);
			long fileSize = fileMeta.getFileSize();
			SnapshotFileInfo fileInfo = new SnapshotFileInfo(fileId, fileSize, sharedDataSize,
				numPage, (float) fileSize / sharedDataSize);
			fileInfoList.add(fileInfo);
			// add size of useless data in file
			totalFileSize += fileSize - sharedDataSize;
			// only consider the file which has been closed for write
			if (fileMeta.getFileWriter() != null) {
				fileOpenedForWrite.add(fileId);
			}
		}

		float currentRatio = totalDataSize == 0 ? 1.0f : (float) totalFileSize / totalDataSize;
		// sort files by ratio in descending order
		fileInfoList.sort((f1, f2) -> Float.compare(f2.ratio, f1.ratio));

		if (LOG.isDebugEnabled()) {
			LOG.debug("current snapshot {} statistics: number of shared files {}, total file size {},"
				+ " total data size {}, amplification ratio {}", pendingSnapshot.getCheckpointId(),
				fileInfoList.size(), totalFileSize, totalDataSize, currentRatio);
			LOG.debug("current snapshot {} shared file details: {}", pendingSnapshot.getCheckpointId(), fileInfoList);
		}

		if (currentRatio <= targetRatio) {
			return SnapshotCheckResult.of(totalFileSize, totalDataSize, currentRatio, false,
				totalFileSize, incrementalDataSize, currentRatio, Collections.emptyMap());
		}

		// files which should be compacted
		Map compactionFiles = new HashMap<>();
		long expectedTotalFileSize = totalFileSize;
		long expectedIncrementalDataSize = incrementalDataSize;
		float expectedTotalRatio = currentRatio;
		int i = 0;
		while (i < fileInfoList.size() && expectedTotalRatio > targetRatio) {
			SnapshotFileInfo info = fileInfoList.get(i);
			// only consider those files that have been closed for write
			if (!fileOpenedForWrite.contains(info.fileId)) {
				compactionFiles.put(info.fileId, info);
				// minus the useless data from totalDataSize
				expectedTotalFileSize -= (info.fileSize - info.dataSize);
				expectedIncrementalDataSize -= info.dataSize;
				expectedTotalRatio = (float) expectedTotalFileSize / totalDataSize;
			}
			i++;
		}

		if (LOG.isDebugEnabled()) {
			LOG.debug("expected snapshot {} statistics: compact {} files, total file size {},"
				+ " total data size {}, amplification ratio, {}", pendingSnapshot.getCheckpointId(),
				i, expectedTotalFileSize, totalDataSize, expectedTotalRatio);
			LOG.debug("expected snapshot {} files to compact: {}", pendingSnapshot.getCheckpointId(), compactionFiles.values());
		}

		return SnapshotCheckResult.of(totalFileSize, totalDataSize, currentRatio, true,
			expectedTotalFileSize, expectedIncrementalDataSize, expectedTotalRatio, compactionFiles);
	}

	private void updateSnapshotStat(SnapshotCheckResult snapshotCheckResult) {
		SnapshotStat snapshotStat = pendingSnapshot.getSnapshotStat();
		snapshotStat.setTotalFileSizeBeforeCompaction(snapshotCheckResult.totalFileSize);
		snapshotStat.setIncrementalSizeBeforeCompaction(snapshotStat.addAndGetIncrementalSize(0));
		snapshotStat.setAmplificationRatioBeforeCompaction(snapshotCheckResult.amplificationRatio);
		snapshotStat.setNeedCompaction(snapshotCheckResult.isNeedCompaction);
	}

	private void transferPages(SnapshotCheckResult checkResult) {
		SnapshotCompletableFuture completableFuture = pendingSnapshot.getResultFuture();
		Map compactionFiles = checkResult.compactionFiles;
		Map> regionSnapshotMetas =
			pendingSnapshot.getGRegionSnapshotMeta();
		SnapshotStat snapshotStat = pendingSnapshot.getSnapshotStat();

		addTask();
		snapshotStat.setCompactionStartTime(System.currentTimeMillis());
		PageBatchFlusher pageBatchFlusher = pendingSnapshot.getDfsPageBatchFlusher();
		// if flusher for this snapshot is not force, then create a new flusher.
		if (!pageBatchFlusher.isForceFlush()) {
			PageBatchFlusher newPageBatchFlusher = new PageBatchFlusher(
				pageBatchFlusher.getBatchNumPages(),
				pageBatchFlusher.getBatchDataSize(),
				true,
				pageBatchFlusher.getFileCache(),
				pageBatchFlusher.getEventExecutorGroup());
			pageBatchFlusher = newPageBatchFlusher;
		}
		// TODO currently it's thread safe for dfsPageBatchFlusher if we use it here,
		// because snapshot compaction will run in snapshot executor, so it's
		// safe to use dfsPageBatchFlusher here if it's forced
		for (Map region : regionSnapshotMetas.values()) {
			for (SnapshotManager.GRegionSnapshotMeta meta : region.values()) {
				Iterator iterator = meta.getPageIndex().pageIterator();
				while (iterator.hasNext()) {
					PageAddress page = iterator.next();
					int fileId = dfsFileManager.getSimpleFileID(page.getDfsAddress());
					if (compactionFiles.containsKey(fileId)) {
						completableFuture.incRunningTask();
						addTask();
						snapshotStat.addAndGetIncrementalSize(page.getDataLen());
						snapshotStat.addAndGetIncrementalPages(1);
						pageBatchFlusher.addPage(page, meta.getGRegionContext(),
							(success, throwable) -> {
								if (!success) {
									LOG.error("Write error when snapshot dfs", throwable);
									completableFuture.setEndSnapshot();
									completableFuture.completeExceptionally(throwable);
								}
								removeTask();
								completableFuture.decRunningTask();
							});
					}
				}
 			}
		}
		pageBatchFlusher.flush();

		removeTask();
	}

	private void addTask() {
		compactionTaskCounter.addAndGet(1);
	}

	private void removeTask() {
		if (compactionTaskCounter.addAndGet(-1) == 0) {
			// compaction finished
			pendingSnapshot.getSnapshotStat().setCompactionEndTime(System.currentTimeMillis());
		}
	}

	/**
	 * Information of file used by snapshot.
	 */
	static class SnapshotFileInfo {
		int fileId;
		long fileSize;
		long dataSize;
		int numPage;
		float ratio;

		SnapshotFileInfo(int fileId, long fileSize, long dataSize, int numPage, float ratio) {
			this.fileId = fileId;
			this.fileSize = fileSize;
			this.dataSize = dataSize;
			this.numPage = numPage;
			this.ratio = ratio;
		}

		@Override
		public String toString() {
			return "SnapshotFileInfo={" +
				"fileId=" + fileId +
				", fileSize=" + fileSize +
				", dataSize=" + dataSize +
				", numPage=" + numPage +
				", ratio=" + ratio +
				"}";
		}
	}

	static class SnapshotCheckResult {
		/**
		 * Total file size before compaction.
		 */
		long totalFileSize;

		/**
		 * Data size of snapshot.
		 */
		long totalDataSize;

		/**
		 * Amplification ratio before compaction.
		 */
		float amplificationRatio;

		/**
		 * Whether to need compaction.
		 */
		boolean isNeedCompaction;

		/**
		 * Files which needs to compact.
		 */
		Map compactionFiles;

		/**
		 * Expected total file size after compaction.
		 */
		long expectedTotalFileSize;

		/**
		 * Expected incremental size of snapshot after compaction.
		 */
		long expectedIncrementalDataSize;

		/**
		 * Expected amplification ratio after compaction.
		 */
		float expectedAmplificationRatio;

		static SnapshotCheckResult of(
			long totalSnapshotFileSize,
			long totalDataSize,
			float amplificationRatio,
			boolean isNeedCompaction) {
			return of(totalSnapshotFileSize, totalDataSize, amplificationRatio, isNeedCompaction,
				totalSnapshotFileSize, 0L, amplificationRatio, Collections.emptyMap());
		}

		static SnapshotCheckResult of(
			long totalFileSize,
			long totalDataSize,
			float amplificationRatio,
			boolean isNeedCompaction,
			long expectedTotalFileSize,
			long expectedIncrementalDataSize,
			float expectedAmplificationRatio,
			Map compactionFiles) {
			SnapshotCheckResult result = new SnapshotCheckResult();
			result.totalFileSize = totalFileSize;
			result.totalDataSize = totalDataSize;
			result.amplificationRatio = amplificationRatio;
			result.isNeedCompaction = isNeedCompaction;
			result.expectedTotalFileSize = expectedTotalFileSize;
			result.expectedIncrementalDataSize = expectedIncrementalDataSize;
			result.expectedAmplificationRatio = expectedAmplificationRatio;
			result.compactionFiles = compactionFiles;

			return result;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy