All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.filecache.FileCache Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.filecache;

import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileWriter;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressSingleImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;

import javax.annotation.Nullable;

import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.function.BiConsumer;

/**
 * Cache based on file. Generally, the page will store the data in the cache first,
 * and it depends on the implementation when to flush the page to the destination
 * storage. {@link #flushPage} can be used to flush the page to the destination storage
 * directly. It depends on the implementation whether the methods are blocked on IO.
 */
public abstract class FileCache implements Closeable {

	/**
	 * Type of file cache classified by the capacity.
	 */
	public enum FileCacheType {

		/**
		 * There is no capacity.
		 */
		NONE,

		/**
		 * There is infinite capacity.
		 */
		INFINITE,

		/**
		 * There is limited capacity.
		 */
		LIMITED
	}


	/**
	 * Capacity of the cache.
	 */
	protected final long capacity;

	protected final FileCacheStat fileCacheStat;

	public FileCache(long capacity, FileCacheStat fileCacheStat) {
		this.capacity = capacity;
		this.fileCacheStat = Preconditions.checkNotNull(fileCacheStat);
	}

	/**
	 * Return the capacity of the cache.
	 */
	public long capacity() {
		return capacity;
	}

	/**
	 * Whether the page is cached.
	 */
	public abstract boolean isCached(PageAddress pageAddress);

	/**
	 * Add the page to cache. Must guarantee the page is in memory or on dfs.
	 * It depends on the implementation whether this method is sync
	 * or async. It can use {@param eventExecutor} to be async.
	 * We will run the {@param callBack} finally, and deliver a boolean
	 * argument which indicates whether the page has been successfully
	 * added.
	 *
	 * @param pageAddress address of page to add.
	 * @param gRegionContext context of region this page belongs to.
	 * @param eventExecutor executor used to be async.
	 * @param callBack code to execute after page is added to cache.
	 */
	public abstract void addPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		@Nullable BiConsumer callBack);

	/**
	 * Get the page.
	 */
	public abstract GByteBuffer getPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor);

	/**
	 * Discard the page from the cache and destination storage.
	 * It depends on the implementation whether this method is
	 * sync or async. It can use {@param eventExecutor} to be async.
	 *
	 * @param pageAddress address of page to discard.
	 * @param gRegionContext context of region this page belongs to.
	 * @param eventExecutor executor used to be async.
	 */
	public abstract void discardPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor);

	/**
	 * Flush the page to the destination storage without caching.
	 * It depends on the implementation whether this method is sync or async.
	 * It can use {@param eventExecutor} to be async. If {@param force} is true,
	 * the page will always be flushed no matter whether it has been flushed,
	 * and replace the old address. We will run the {@param callBack} finally,
	 * and deliver a boolean argument which indicates whether the page has been
	 * successfully added.
	 *
	 * @param pageAddress address of page to flush.
	 * @param gRegionContext context of region this page belongs to.
	 * @param eventExecutor executor used to be async.
	 * @param force whether to force to flush.
	 * @param callBack code to execute after page is flushed.
	 */
	public abstract void flushPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		boolean force,
		@Nullable BiConsumer callBack);

	/**
	 * Add a batch of pages to cache. It will guarantee that pages
	 * are either all successful to add or all failed to add. The
	 * semantic of success is that once the page address is updated,
	 * data can be read immediately. There won't be only some pages
	 * to add successfully. It depends on the implementation whether
	 * this method is sync or async. It can use {@param eventExecutor}
	 * to be async. We will run the {@param callBacks} finally, and
	 * deliver a boolean argument which indicates whether pages has
	 * been successfully added.
	 *
	 * @param pages list of pages to add.
	 * @param gRegionContexts list of region contexts.
	 * @param eventExecutor executor used to be async.
	 * @param callBacks list of callbacks to execute after add.
	 */
	public abstract void addBatchPages(
		List pages,
		List gRegionContexts,
		EventExecutor eventExecutor,
		List> callBacks);

	/**
	 * Flush a batch of pages to the destination storage without caching.
	 * It will guarantee that pages are either all successful to flush or
	 * all fail to flush. There won't be only some pages to flush successfully.
	 * The semantic of success is that once the page address is updated,
	 * data can be read immediately. It depends on the implementation whether
	 * this method is sync or async. It can use {@param eventExecutor} to be async.
	 * We will run the {@param callBacks} , and deliver a boolean argument which
	 * indicates whether pages has been successfully flushed.
	 *
	 * @param pages a collection of pages to flush.
	 * @param gRegionContexts list of region contexts.
	 * @param eventExecutor executor used to be async.
	 * @param force whether to force to flush the page if there is already an address.
	 * @param flushLocal whether to ensuer local is valid before update dfs address.
	 * @param callBacks code to execute after flush no matter it's successful or failed.
	 */
	public abstract void flushBatchPages(
		List pages,
		List gRegionContexts,
		EventExecutor eventExecutor,
		boolean force,
		boolean flushLocal,
		List> callBacks);

	/**
	 * TODO this is a hack method to sync snapshot data. We will improve it later.
	 */
	public abstract void sync() throws IOException;

	public FileCacheStat getFileCacheStat() {
		return fileCacheStat;
	}

	public abstract FileCacheType getFileCacheType();

	/**
	 * Writes page to file with the given file writer, update address of page
	 * and update references of files in the given file manager.
	 *
	 * @param fileManager file manager backed the file to write.
	 * @param fileWriter writer for the file.
	 * @param pageAddress the page.
	 * @param gByteBuffer buffer that contains the data.
	 * @param gRegionContext region context.
	 * @param isLocal whether the address to update is local.
	 * @param flushForce whether to flush after write.
	 */
	void internalAddPage(
		FileManager fileManager,
		FileWriter fileWriter,
		PageAddress pageAddress,
		GByteBuffer gByteBuffer,
		GRegionContext gRegionContext,
		boolean isLocal,
		boolean flushForce) throws Exception {
		Preconditions.checkArgument(pageAddress instanceof PageAddressSingleImpl);
		long address = writePage(fileManager, fileWriter, pageAddress, gByteBuffer, gRegionContext, isLocal);

		if (flushForce) {
			fileWriter.flush();
		}

		updatePageAddress(fileManager, pageAddress, address, isLocal,
			gRegionContext.getGContext().getAccessNumber());
	}

	/**
	 * Writes page to file with the given file writer, and return the
	 * address of page if successful.
	 */
	long writePage(
		FileManager fileManager,
		FileWriter fileWriter,
		PageAddress page,
		GByteBuffer buffer,
		GRegionContext gRegionContext,
		boolean isLocal) throws IOException {
		long address;
		int numRetires = 0;
		long startTime = System.nanoTime();
		int diskDataLen;
		while (true) {
			try {
				address = fileWriter.getAddress();
				diskDataLen = fileManager.getDataPageUtil().write(
					fileWriter,
					buffer,
					page,
					gRegionContext.getGContext().getFlushWholePageGCompressAlgorithm(isLocal),
					gRegionContext.getGContext().getWholePageCompressThreshold(),
					gRegionContext.getGContext().getGConfiguration().isChecksumEnable());
				break;
			} catch (Exception e) {
				//TODO: #SR add a filter or something else for exceptions.
				fileWriter.increasFailCount();
				++numRetires;
				if (numRetires >= 3) {
					throw e;
				}
			}
		}

		fileWriter.resetFailCount();
		updateWriteStat(diskDataLen, page.getDataLen(), System.nanoTime() - startTime, isLocal);

		return address;
	}

	/**
	 * Replace address of page to the new one, and update references
	 * of files backed the old and new addresses in the given file
	 * manager. Note that new address has not refer the backed file
	 * when this function is called.
	 *
	 * @param fileManager file manager backed the old and new address.
	 * @param page page to update references.
	 * @param newAddress new address.
	 * @param isLocal whether the address to update is local.
	 * @param accessNumber access number when address is updated.
	 */
	void updatePageAddress(
		FileManager fileManager,
		PageAddress page,
		long newAddress,
		boolean isLocal,
		long accessNumber) {
		// add the reference for the file used by new address before we goes on
		// this fix [BLINK-20749612]
		fileManager.incDBReference(newAddress, page.getDataLen());

		boolean pageValid;
		boolean hasOldAddress = false;
		long oldAddress = 0;
		synchronized (page) {
			pageValid = page.isPageValid();
			if (pageValid) {
				hasOldAddress = isLocal ? page.isLocalValid() : page.isDfsValid();
				oldAddress = isLocal ? page.getLocalAddress() : page.getDfsAddress();
			}
			// we always set the address no matter whether the page is discarded
			if (isLocal) {
				page.setLocalAddress(newAddress);
				page.setLocalStatus(true);
			} else {
				page.setDfsAddress(newAddress);
				page.setDfsStatus(true);
			}
		}

		if (pageValid) {
			// if old address exists, we need to dereference the file it used
			if (hasOldAddress) {
				// decrement reference may lead to delete file, but it may be used somewhere
				// 1. read by main thread
				//    In this case, we use access number to delay the deletion. Because this page
				//    is not discarded, so it's important to set new address before getting access
				//    number so that old address will not be used by next access.
				// 2. read by split or compaction
				//    In this case, we will retry in getPage so that we can use the new address
				//    to read data again
				fileManager.decDBReference(
					oldAddress,
					accessNumber,
					System.currentTimeMillis(),
					page.getDataLen());
			}
		} else {
			// if page is not valid, we should deference file used by the new address.
			fileManager.decDBReference(
				newAddress,
				accessNumber,
				System.currentTimeMillis(),
				page.getDataLen());
		}
	}

	private void updateWriteStat(long diskLen, long size, long time, boolean isLocal) {
		if (isLocal) {
			fileCacheStat.addLocalWrite(diskLen, size, time);
		} else {
			fileCacheStat.addDFSWrite(diskLen, size, time);
		}
	}

	public static FileCache createFileCache(
		GContext context,
		FileManager localFileManager,
		FileManager dfsFileManager) {
		GConfiguration gConfiguration = context.getGConfiguration();
		FileCacheType type = gConfiguration.getFileCacheType();
		FileCache fileCache;
		switch (type) {
			case NONE:
				fileCache = new NoFileCache(context, dfsFileManager);
				break;
			case INFINITE:
				fileCache = new InfiniteFileCache(context, localFileManager, dfsFileManager);
				break;
			case LIMITED:
				throw new GeminiRuntimeException("Limited file cache is not supported currently");
			default:
				throw new GeminiRuntimeException("Unknown file cache type " + type);
		}

		return fileCache;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy