All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.filecache.NoCapacityFileCache Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.filecache;

import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileReader;
import org.apache.flink.runtime.state.gemini.engine.fs.FileWriter;
import org.apache.flink.runtime.state.gemini.engine.metrics.FileCacheMetrics;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.compress.GCompressAlgorithm;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;

/**
 * An implementation of {@link FileCache} with no capacity.
 * NOTICE: TODO not support reference.
 */
public class NoCapacityFileCache extends FileCache {

	private static final Logger LOG = LoggerFactory.getLogger(NoCapacityFileCache.class);

	private final GContext gContext;

	private final long maxFileSize;

	/**
	 * File manager for destination storage. It can be null if cp is disabled.
	 */
	private final FileManager dfsFileManager;

	private volatile boolean closed;

	/**
	 * Each event executor has it's own file writer.
	 */
	private Map destFileWriters;

	public NoCapacityFileCache(
		GContext gContext,
		FileManager dfsFileManager
	) {
		super(0, new FileCacheStat());

		this.gContext = Preconditions.checkNotNull(gContext);
		this.maxFileSize = gContext.getGConfiguration().getMaxLogStructureFileSize();

		this.dfsFileManager = Preconditions.checkNotNull(dfsFileManager);
		this.destFileWriters = new ConcurrentHashMap<>();

		FileCacheMetrics fileCacheMetrics = gContext.getFileCacheMetrics();
		if (fileCacheMetrics != null) {
			fileCacheMetrics.register(fileCacheStat);
		}

		this.closed = false;

		LOG.info("NoCapacityFileCache created, DestFileManager {}", dfsFileManager);
	}

	@Override
	public boolean isCached(PageAddress pageAddress) {
		return pageAddress.isDfsValid();
	}

	@Override
	public void addPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		BiConsumer callBack
	) {
		flushPage(pageAddress, gRegionContext, eventExecutor, false, callBack);
	}

	@Override
	public DataPage getPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor) {
		Preconditions.checkState(pageAddress.isDfsValid(), "dfs address should be valid");
		try {
			DataPage dataPage = getDataPage(dfsFileManager, gRegionContext,
					pageAddress::getDfsAddress, pageAddress);
			Preconditions.checkNotNull(dataPage, "no page exists on dfs");
			return dataPage;
		} catch (Exception e) {
			LOG.error("exception when get page, {}", e);
			throw new GeminiRuntimeException("exception when get page", e);
		}
	}

	@Override
	public void discardPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor) {
		boolean pageValid;
		boolean dfsValid = false;
		long dfsAddress = -1;
		synchronized (pageAddress) {
			pageValid = pageAddress.isPageValid();
			if (pageValid) {
				pageAddress.setPageStatus(false);
				dfsValid = pageAddress.isDfsValid();
				if (dfsValid) {
					dfsAddress = pageAddress.getDfsAddress();
				}
			}
		}
		if (pageValid && dfsValid) {
			long accessNumber = gContext.getAccessNumber();
			long ts = System.currentTimeMillis();
			// decrement reference may lead to delete file, but it may be used somewhere
			// 1. read by main thread
			//    In this case, we use access number to delay the deletion.
			// 2. read by split or compaction
			//    generally discardPage is called in region thread, and it will ensure the page
			//    will not be used by split and compaction after discard
			dfsFileManager.decDBReference(
				dfsAddress,
				accessNumber,
				ts,
				pageAddress.getDataLen());
		}
	}

	@Override
	public void flushPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		boolean force,
		BiConsumer callBack) {
		if (!force && pageAddress.isDfsValid()) {
			if (callBack != null) {
				callBack.accept(true, null);
			}
			return;
		}

		//add reference before thread runs.
		final DataPage oriDataPage = pageAddress.getDataPage();
		eventExecutor.submit(() -> {
			boolean success = false;
			Throwable throwable = null;
			boolean pageIsNull = false;
			DataPage dataPage = oriDataPage;
			try {
				// recheck whether to flush
				if (force || !pageAddress.isDfsValid()) {
					if (dataPage == null) {
						pageIsNull = true;
						if (pageAddress.isDfsValid()) {
							// this may happen when data migration or snapshot for the first time after rescale
							dataPage = getDataPage(dfsFileManager, gRegionContext,
								pageAddress::getDfsAddress, pageAddress);
						}
					}
					Preconditions.checkNotNull(dataPage, "Data page is null");
					FileWriter fileWriter = getOrCreateFileWriter(destFileWriters, dfsFileManager, eventExecutor);
					internalAddPage(dfsFileManager, fileWriter, pageAddress, dataPage, gRegionContext, false, true);
				}
				success = true;
			} catch (Exception e) {
				success = false;
				throwable = e;
				LOG.error("error when adding page to cache: pageIsNull={}, {}", pageIsNull, e.getMessage(), e);
			} finally {
				if (dataPage != null) {
					dataPage.delReferenceCount(ReleaseType.Normal);
				}
				if (callBack != null) {
					callBack.accept(success, throwable);
				}
			}
		});
	}

	@Override
	public void sync(EventExecutor eventExecutor) throws IOException {
		FileWriter fileWriter = destFileWriters.get(eventExecutor);
		if (fileWriter != null) {
			// file writer guarantee the thread safe for sync
			fileWriter.sync();
		}
	}

	@Override
	public void close() {
		synchronized (this) {
			if (closed) {
				LOG.warn("NoCapacityFileCache has been closed");
				return;
			}
			closed = true;
		}

		for (FileWriter fileWriter : destFileWriters.values()) {
			dfsFileManager.closeFileWriter(fileWriter);
		}
		destFileWriters.clear();
		LOG.info("NoCapacityFileCache is closed");
	}

	private void internalAddPage(
		FileManager fileManager,
		FileWriter fileWriter,
		PageAddress pageAddress,
		DataPage dataPage,
		GRegionContext gRegionContext,
		boolean isLocal,
		boolean flushForce
	) throws Exception {
		long address;
		long startTime = System.nanoTime();
		int onDiskLen;
		int numRetires = 0;
		while (true) {
			try {
				address = fileWriter.getAddress();
				onDiskLen = dataPage.write(fileWriter,
					gRegionContext.getPageSerdeFlink(),
					pageAddress,
					isLocal ? gRegionContext.getGContext().getFlushWholePageGCompressAlgorithm() : GCompressAlgorithm.None,
					gRegionContext.getGContext().getGConfiguration().isChecksumEnable());
				fileWriter.resetFailCount();
				break;
			} catch (IOException e) {
				fileWriter.increasFailCount();
				++numRetires;
				if (numRetires > 3) {
					LOG.error("internal add page exception: {}, {}", fileWriter, pageAddress);
					throw e;
				}
			}
		}

		if (flushForce) {
			fileWriter.flush();
		}
		fileCacheStat.addDFSWrite(dataPage.getSize(), System.nanoTime() - startTime);

		// add the reference for the file used by new address before we check the PageAddress status
		fileManager.incDBReference(address, pageAddress.getDataLen());

		boolean pageValid;
		boolean hasOldAddress = false;
		long oldAddress = 0;
		synchronized (pageAddress) {
			//TODO DFS now not support compress. HAVE BUG
			pageAddress.afterFlush(onDiskLen, gRegionContext.getGContext().getFlushWholePageGCompressAlgorithm());
			pageValid = pageAddress.isPageValid();
			if (pageValid) {
				hasOldAddress = isLocal ? pageAddress.isLocalValid() : pageAddress.isDfsValid();
				oldAddress = isLocal ? pageAddress.getLocalAddress() : pageAddress.getDfsAddress();
			}
			// we always set the address no matter whether the page is discarded
			if (isLocal) {
				pageAddress.setLocalAddress(address);
				pageAddress.setLocalStatus(true);
			} else {
				pageAddress.setDfsAddress(address);
				pageAddress.setDfsStatus(true);
			}
		}

		// deal with reference only when the page is not discarded
		if (pageValid) {
			// if old address exists, we need to dereference the file it used
			if (hasOldAddress) {
				// decrement reference may lead to delete file, but it may be used somewhere
				// 1. read by main thread
				//    In this case, we use access number to delay the deletion. Because this page
				//    is not discarded, so it's important to set new address before getting access
				//    number so that old address will not be used by next access.
				// 2. read by split or compaction
				//    In this case, we will retry in getPage so that we can use the new address
				//    to read data again
				fileManager.decDBReference(
					oldAddress,
					gContext.getAccessNumber(),
					System.currentTimeMillis(),
					pageAddress.getDataLen());
			}
		} else {
			// if page is not valid, we should deference file used by the new address.
			fileManager.decDBReference(
				address,
				gContext.getAccessNumber(),
				System.currentTimeMillis(),
				pageAddress.getDataLen());
		}
	}

	private DataPage getDataPage(
		FileManager fileManager,
		GRegionContext gRegionContext,
		Callable addressCallable,
		PageAddress pageAddress
	) throws Exception {
		int unexpectedTries = 0;
		int expectedTries = 0;
		DataPage dataPage;
		long address = addressCallable.call();
		while (true) {
			try {
				FileReader fileReader = fileManager.getFileReader(address);
				long offset = fileManager.getFileOffset(address);
				long startTime = System.nanoTime();
				dataPage = fileManager.getDataPageUtil().getDataPageFromReader(gRegionContext.getPageSerdeFlink(),
					fileReader,
					(int) offset,
					pageAddress);
				fileCacheStat.addDFSRead(pageAddress.getOnDiskDataLen(), System.nanoTime() - startTime);
				return dataPage;
			} catch (Exception e) {
				long oldAddress = address;
				// the address may be replaced, and we should update it every time
				address = addressCallable.call();
				// only when the address is updated, we increment the number of retry
				if (oldAddress == address) {
					unexpectedTries += 1;
				} else {
					expectedTries += 1;
				}
				if (unexpectedTries >= 3 || expectedTries >= 10) {
					LOG.error("get page failed, try " + unexpectedTries +
						" times unexpectedly, and try " + expectedTries + " times expectedly, last exception " + e);
					throw e;
				}
			}
		}
	}

	/**
	 * This will be executed in the event executor, so file writer for an
	 * event executor will not be created concurrently.
	 */
	private FileWriter getOrCreateFileWriter(
		Map fileWriterMap,
		FileManager fileManager,
		EventExecutor eventExecutor) {
		if (closed) {
			throw new GeminiRuntimeException("NoCapacityFileCache has been closed");
		}

		FileWriter fileWriter = fileWriterMap.get(eventExecutor);
		if (fileWriter != null && (!fileWriter.isValid() || fileWriter.size() >= maxFileSize)) {
			fileManager.closeFileWriter(fileWriter);
			fileWriterMap.remove(eventExecutor);
			LOG.info("close file writer {}/{} in {}", fileWriter.getFileID(), fileWriter.isValid(), eventExecutor);
			fileWriter = null;
		}
		if (fileWriter == null) {
			fileWriter = fileManager.createNewFileWriter();
			fileWriterMap.put(eventExecutor, fileWriter);
			LOG.info("create new file writer {} in {}", fileWriter.getFileID(), eventExecutor);
		}
		return fileWriter;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy