All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.filecache.NoFileCache Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.filecache;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.filecompaction.FileCompactionPageTransfer;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileReader;
import org.apache.flink.runtime.state.gemini.engine.fs.FileWriter;
import org.apache.flink.runtime.state.gemini.engine.metrics.FileCacheMetrics;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressSingleImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;

/**
 * An implementation of {@link FileCache} with no capacity.
 * TODO need to support reference and read new data after DFS input stream has been opened.
 */
public class NoFileCache extends FileCache implements FileCompactionPageTransfer {

	private static final Logger LOG = LoggerFactory.getLogger(NoFileCache.class);

	private final GContext gContext;

	private final long maxFileSize;

	private final boolean syncWhenBatchFlush;

	/**
	 * File manager for destination storage. It can be null if cp is disabled.
	 */
	private final FileManager dfsFileManager;

	private volatile boolean closed;

	/**
	 * Each event executor has it's own file writer.
	 */
	private Map dfsFileWriters;

	public NoFileCache(GContext gContext, FileManager dfsFileManager) {
		super(0, new FileCacheStat());

		this.gContext = Preconditions.checkNotNull(gContext);
		this.maxFileSize = gContext.getGConfiguration().getMaxFileSize();
		this.syncWhenBatchFlush = gContext.getGConfiguration().isSnapshotSyncWhenBatchFlush();

		this.dfsFileManager = Preconditions.checkNotNull(dfsFileManager);
		this.dfsFileWriters = new ConcurrentHashMap<>();

		FileCacheMetrics fileCacheMetrics = gContext.getFileCacheMetrics();
		if (fileCacheMetrics != null) {
			fileCacheMetrics.register(fileCacheStat);
		}

		this.closed = false;

		LOG.info("NoFileCache created, DestFileManager {}", dfsFileManager);
	}

	@VisibleForTesting
	public FileManager getDfsFileManager() {
		return dfsFileManager;
	}

	// implementation for file cache =======================================================

	@Override
	public boolean isCached(PageAddress pageAddress) {
		return pageAddress.isDfsValid();
	}

	@Override
	public void addPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		BiConsumer callBack) {
		flushPage(pageAddress, gRegionContext, eventExecutor, false, callBack);
	}

	@Override
	public GByteBuffer getPage(
		PageAddress pageAddress, GRegionContext gRegionContext, EventExecutor eventExecutor) {
		Preconditions.checkState(pageAddress.isDfsValid(), "dfs address should be valid");
		try {
			GByteBuffer dataPage = getGByteBuffer(pageAddress);
			Preconditions.checkNotNull(dataPage, "no page exists on dfs");
			return dataPage;
		} catch (Exception e) {
			LOG.error("exception when get page, {}", e);
			throw new GeminiRuntimeException("exception when get page", e);
		}
	}

	@Override
	public void discardPage(
		PageAddress pageAddress, GRegionContext gRegionContext, EventExecutor eventExecutor) {
		Preconditions.checkArgument(pageAddress instanceof PageAddressSingleImpl);
		boolean pageValid;
		boolean dfsValid = false;
		long dfsAddress = -1;
		synchronized (pageAddress) {
			pageValid = pageAddress.isPageValid();
			if (pageValid) {
				pageAddress.setPageStatus(false);
				dfsValid = pageAddress.isDfsValid();
				if (dfsValid) {
					dfsAddress = pageAddress.getDfsAddress();
				}
			}
		}
		if (pageValid && dfsValid) {
			long accessNumber = gContext.getAccessNumber();
			long ts = System.currentTimeMillis();
			// decrement reference may lead to delete file, but it may be used somewhere
			// 1. read by main thread
			//    In this case, we use access number to delay the deletion.
			// 2. read by split or compaction
			//    generally discardPage is called in region thread, and it will ensure the page
			//    will not be used by split and compaction after discard
			dfsFileManager.decDBReference(dfsAddress, accessNumber, ts, pageAddress.getDataLen());
		}
	}

	@Override
	public void flushPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		boolean force,
		BiConsumer callBack) {
		if (!force && pageAddress.isDfsValid()) {
			if (callBack != null) {
				callBack.accept(true, null);
			}
			return;
		}

		// add reference before thread runs.
		eventExecutor.execute(() -> {
			boolean success = false;
			Throwable throwable = null;
			boolean pageIsNull = false;
			GByteBuffer gByteBuffer = pageAddress.getGByteBufferWithReference();
			try {
				// recheck whether to flush
				if (force || !pageAddress.isDfsValid()) {
					if (gByteBuffer == null) {
						pageIsNull = true;
						if (pageAddress.isDfsValid()) {
							// this may happen when data migration or snapshot for the first time after rescale
							gByteBuffer = getGByteBuffer(pageAddress);
						}
					}
					Preconditions.checkNotNull(gByteBuffer, "Data page is null");
					FileWriter fileWriter = getOrCreateFileWriter(dfsFileWriters, dfsFileManager, eventExecutor);
					internalAddPage(dfsFileManager, fileWriter, pageAddress, gByteBuffer,
						gRegionContext, false, true);
				}
				success = true;
			} catch (Exception e) {
				success = false;
				throwable = e;
				LOG.error("error when adding page to cache: pageIsNull={}, {}", pageIsNull, e.getMessage(), e);
			} finally {
				if (gByteBuffer != null) {
					gByteBuffer.release();
				}
				if (callBack != null) {
					callBack.accept(success, throwable);
				}
			}
		});
	}

	@Override
	public void addBatchPages(
		List pages,
		List gRegionContext,
		EventExecutor eventExecutor,
		List> callBacks) {
		flushBatchPages(pages, gRegionContext, eventExecutor, false, false, callBacks);
	}

	@Override
	public void flushBatchPages(
		List pages,
		List gRegionContexts,
		EventExecutor eventExecutor,
		boolean force,
		boolean flushLocal,
		List> callBacks) {

		if (pages.isEmpty()) {
			return;
		}
		eventExecutor.execute(() -> {
			boolean success = true;
			Throwable throwable = null;
			try {
				int size = pages.size();
				// TODO reuse list
				List addressList = new ArrayList<>(pages.size());
				FileWriter fileWriter = getOrCreateFileWriter(dfsFileWriters, dfsFileManager, eventExecutor);
				for (int i = 0; i < size; i++) {
					PageAddress page = pages.get(i);
					if (force || !page.isDfsValid()) {
						GByteBuffer buffer = page.getGByteBufferWithReference();
						try {
							if (buffer == null) {
								buffer = getGByteBuffer(page);
							}
							if (buffer != null) {
								// write page to dfs
								long address = writePage(dfsFileManager, fileWriter, page, buffer,
									gRegionContexts.get(i), false);
								addressList.add(address);
							} else {
								throw new GeminiRuntimeException("data page does not exist");
							}
						} finally {
							// release buffer as soon as possible
							if (buffer != null) {
								buffer.release();
							}
						}
					} else {
						// a null address indicates there is no need to update page
						addressList.add(null);
					}
				}

				// flush to ensure data can be read immediately after addresses are updated
				if (syncWhenBatchFlush) {
					fileWriter.sync();
				} else {
					fileWriter.flush();
				}

				long accessNumber = gRegionContexts.get(0).getGContext().getAccessNumber();
				// update file references, and there shouldn't have exception happened
				for (int i = 0; i < size; i++) {
					Long address = addressList.get(i);
					// skip those pages that has not a new address
					if (address != null) {
						updatePageAddress(dfsFileManager, pages.get(i), address, false, accessNumber);
					}
				}
			} catch (Exception e) {
				success = false;
				throwable = new FlushBatchPageException(e);
			} finally {
				// execute callbacks for all pages
				for (BiConsumer callBack : callBacks) {
					if (callBack != null) {
						callBack.accept(success, throwable);
					}
				}
			}
		});
	}

	@Override
	public void sync() throws IOException {
		// sync dfs data
		for (FileWriter fileWriter : dfsFileWriters.values()) {
			fileWriter.sync();
		}
	}

	@Override
	public FileCacheType getFileCacheType() {
		return FileCacheType.NONE;
	}

	// implementation for page transfer =======================================================

	@Override
	public FileManager getDbFileManager() {
		return dfsFileManager;
	}

	@Override
	public boolean hasDbFileAddress(PageAddress pageAddress) {
		return pageAddress.isPageValid() && pageAddress.isDfsValid();
	}

	@Override
	public int getDbFileId(PageAddress pageAddress) {
		return dfsFileManager.getSimpleFileID(pageAddress.getDfsAddress());
	}

	@Override
	public void transferPage(
		PageAddress pageAddress,
		GRegionContext gRegionContext,
		EventExecutor eventExecutor,
		@Nullable BiConsumer callBack) {

		// FIXME because file compaction is not started from region executor,
		// so it's no use to get byte buffer from outer of executor. Do not
		// enable file compaction in off-heap mode
		eventExecutor.execute(() -> {
			boolean success = false;
			Throwable throwable = null;
			GByteBuffer buffer = null;
			try {
				// TODO there are some work to complete
				// 1. mechanism for data page reference in off-heap mode is not suitable
				// in this case, and this expected to be solved in [BLINK-21500417], so
				// currently file compaction can only be enabled in on-heap mode
				// 2. if page is not in memory, we will load it, build a DataPage and write
				// again. But actually there is no need to build a DataPage, and the input
				// byte steam can be output directly. If compression is enabled, we need do
				// a bit more work, so we will do it after rebase the compression code

				// recheck whether the dfs address if valid
				if (pageAddress.isPageValid() && pageAddress.isDfsValid()) {
					buffer = pageAddress.getGByteBufferWithReference();
					if (buffer == null) {
						long dfsAddress = pageAddress.getDfsAddress();
						FileReader fileReader = dfsFileManager.getFileReader(dfsAddress);
						long offset = dfsFileManager.getFileOffset(dfsAddress);
						buffer = dfsFileManager.getDataPageUtil().getDataPageFromReader(
							fileReader,
							(int) offset,
							pageAddress);
					}
					FileWriter fileWriter = getOrCreateFileWriter(dfsFileWriters,
						dfsFileManager, eventExecutor);
					internalAddPage(dfsFileManager, fileWriter, pageAddress, buffer,
						gRegionContext, false, true);
					success = true;
				}
			} catch (Exception e) {
				throwable = e;
			} finally {
				if (buffer != null) {
					buffer.release();
				}

				if (callBack != null) {
					callBack.accept(success, throwable);
				}
			}

		});
	}

	@Override
	public void close() {
		synchronized (this) {
			if (closed) {
				LOG.warn("NoFileCache has been closed");
				return;
			}
			closed = true;
		}

		for (FileWriter fileWriter : dfsFileWriters.values()) {
			dfsFileManager.closeFileWriter(fileWriter);
		}
		dfsFileWriters.clear();
		LOG.info("NoFileCache is closed");
	}

	private GByteBuffer getGByteBuffer(PageAddress pageAddress) throws Exception {
		Preconditions.checkArgument(pageAddress instanceof PageAddressSingleImpl);
		int unexpectedTries = 0;
		int expectedTries = 0;

		GByteBuffer gByteBuffer = null;
		long address = pageAddress.getDfsAddress();
		while (true) {
			try {
				FileReader fileReader = dfsFileManager.getFileReader(address);
				long offset = dfsFileManager.getFileOffset(address);
				long startTime = System.nanoTime();
				gByteBuffer = dfsFileManager.getDataPageUtil().getDataPageFromReader(fileReader, (int) offset, pageAddress);
				fileCacheStat.addDFSRead(pageAddress.getDataLen(), System.nanoTime() - startTime);
				return gByteBuffer;
			} catch (Exception e) {
				if (gByteBuffer != null) {
					gByteBuffer.release();
				}
				long oldAddress = address;
				// the address may be replaced, and we should update it every time
				address = pageAddress.getDfsAddress();
				// only when the address is updated, we increment the number of retry
				if (oldAddress == address) {
					unexpectedTries += 1;
				} else {
					expectedTries += 1;
				}
				if (unexpectedTries >= 3 || expectedTries >= 10) {
					LOG.error("get page failed, try " + unexpectedTries + " times unexpectedly, and try " +
						expectedTries + " times expected, last exception " + e);
					throw e;
				}
			} finally {
				gByteBuffer = null;
			}
		}
	}

	/**
	 * This will be executed in the event executor, so file writer for an
	 * event executor will not be created concurrently.
	 */
	private FileWriter getOrCreateFileWriter(
		Map fileWriterMap, FileManager fileManager, EventExecutor eventExecutor) {
		if (closed) {
			throw new GeminiRuntimeException("NoFileCache has been closed");
		}

		FileWriter fileWriter = fileWriterMap.get(eventExecutor);
		if (fileWriter != null && (!fileWriter.isValid() || fileWriter.size() >= maxFileSize)) {
			fileManager.closeFileWriter(fileWriter);
			fileWriterMap.remove(eventExecutor);
			LOG.info("close file writer {}/{} in {}", fileWriter.getFileID(), fileWriter.isValid(), eventExecutor);
			fileWriter = null;
		}
		if (fileWriter == null) {
			fileWriter = fileManager.createNewFileWriter();
			fileWriterMap.put(eventExecutor, fileWriter);
			LOG.info("create new file writer {} in {}", fileWriter.getFileID(), eventExecutor);
		}
		return fileWriter;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy