Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.filecache;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileReader;
import org.apache.flink.runtime.state.gemini.engine.fs.FileWriter;
import org.apache.flink.runtime.state.gemini.engine.metrics.FileCacheMetrics;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.compress.GCompressAlgorithm;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.util.Preconditions;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;
/**
* An implementation of {@link FileCache} with infinite capacity. Pages
* via {@link #addPage} will never been flushed to destination storage.
*/
public class InfiniteCapacityFileCache extends FileCache {
private static final Logger LOG = LoggerFactory.getLogger(InfiniteCapacityFileCache.class);
private final GContext gContext;
private final long maxFileSize;
/**
* File manager for cache.
*/
private final FileManager localFileManager;
/**
* Each event executor has it's own file writer.
*/
private Map localFileWriters;
/**
* File manager for destination storage.
*/
private final FileManager dfsFileManager;
/**
* Each event executor has it's own file writer.
*/
private Map dfsFileWriters;
private volatile boolean closed;
public InfiniteCapacityFileCache(
GContext gContext,
FileManager localFileManager,
FileManager dfsFileManager
) {
super(Long.MAX_VALUE, new FileCacheStat());
this.gContext = Preconditions.checkNotNull(gContext);
this.maxFileSize = gContext.getGConfiguration().getMaxLogStructureFileSize();
Preconditions.checkArgument(maxFileSize > 0, "Max file size should be positive");
this.localFileManager = Preconditions.checkNotNull(localFileManager);
this.dfsFileManager = Preconditions.checkNotNull(dfsFileManager);
this.localFileWriters = new ConcurrentHashMap<>();
this.dfsFileWriters = new ConcurrentHashMap<>();
FileCacheMetrics fileCacheMetrics = gContext.getFileCacheMetrics();
if (fileCacheMetrics != null) {
fileCacheMetrics.register(fileCacheStat);
}
this.closed = false;
LOG.info("InfiniteCapacityFileCache created, LocalFileManager {}, DfsFileManager {}", localFileManager, dfsFileManager);
}
@Override
public boolean isCached(PageAddress pageAddress) {
return pageAddress.isLocalValid();
}
@Override
public void addPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor flushEventExecutor,
BiConsumer callBack
) {
if (pageAddress.isLocalValid()) {
if (callBack != null) {
callBack.accept(true, null);
}
return;
}
//add reference before thread runs.
final DataPage oriDataPage = pageAddress.getDataPage();
flushEventExecutor.submit(() -> {
boolean success = false;
Throwable throwable = null;
DataPage dataPage = oriDataPage;
try {
if (!pageAddress.isLocalValid()) {
if (dataPage == null && pageAddress.isDfsValid()) {
// for infinite cache, the if will be true in the following case
// 1. local recovery is enabled
// 2. restore from a cp located on the dfs
// 3. a local snapshot is started, but some pages are neither in memory and local
dataPage = getDataPage(dfsFileManager, gRegionContext,
pageAddress::getDfsAddress, pageAddress, false);
}
if (dataPage != null) {
FileWriter fileWriter = getOrCreateFileWriter(localFileWriters,
localFileManager,
flushEventExecutor);
internalAddPage(localFileManager, fileWriter, pageAddress, dataPage, gRegionContext, true, true);
} else {
throw new GeminiRuntimeException("data page does not exist");
}
}
success = true;
} catch (Exception e) {
success = false;
throwable = e;
LOG.error("error when adding page to cache: {}", e);
} finally {
if (dataPage != null) {
dataPage.delReferenceCount(ReleaseType.Normal);
}
if (callBack != null) {
callBack.accept(success, throwable);
}
}
});
}
@Override
public DataPage getPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor flushEventExecutor) {
try {
DataPage dataPage = null;
if (pageAddress.isLocalValid()) {
dataPage = getDataPage(localFileManager, gRegionContext,
pageAddress::getLocalAddress, pageAddress, true);
if (dataPage != null) {
fileCacheStat.addHitSize(dataPage.getSize());
}
} else if (pageAddress.isDfsValid()) {
dataPage = getDataPage(dfsFileManager, gRegionContext,
pageAddress::getDfsAddress, pageAddress, false);
if (dataPage != null) {
// after restore, we need to cache the data
final DataPage cacheDataPage = dataPage;
cacheDataPage.addReferenceCount();
flushEventExecutor.submit(() -> {
try {
FileWriter fileWriter = getOrCreateFileWriter(localFileWriters, localFileManager, flushEventExecutor);
internalAddPage(localFileManager,
fileWriter,
pageAddress,
cacheDataPage,
gRegionContext,
true,
true);
} catch (Exception e) {
LOG.error("cache data failed, {}", e);
} finally {
cacheDataPage.delReferenceCount(ReleaseType.Normal);
}
}
);
fileCacheStat.addMissSize(dataPage.getSize());
}
}
Preconditions.checkNotNull(dataPage, "no page exists on local and dfs");
return dataPage;
} catch (Exception e) {
LOG.error("exception when get page, {}", e);
throw new GeminiRuntimeException("exception when get page", e);
}
}
@Override
public void discardPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor eventExecutor) {
boolean pageValid;
boolean localValid = false;
long localAddress = -1;
boolean dfsValid = false;
long dfsAddress = -1;
synchronized (pageAddress) {
pageValid = pageAddress.isPageValid();
if (pageValid) {
pageAddress.setPageStatus(false);
localValid = pageAddress.isLocalValid();
dfsValid = pageAddress.isDfsValid();
if (localValid) {
localAddress = pageAddress.getLocalAddress();
}
if (dfsValid) {
dfsAddress = pageAddress.getDfsAddress();
}
}
}
if (pageValid) {
long accessNumber = gContext.getAccessNumber();
long ts = System.currentTimeMillis();
// decrement reference may lead to delete file, but it may be used somewhere
// 1. read by main thread
// In this case, we use access number to delay the deletion.
// 2. read by split or compaction
// generally discardPage is called in region thread, and it will ensure the page
// will not be used by split and compaction after discard
if (localValid) {
localFileManager.decDBReference(
localAddress,
accessNumber,
ts,
pageAddress.getDataLen());
}
if (dfsValid) {
dfsFileManager.decDBReference(
dfsAddress,
accessNumber,
ts,
pageAddress.getDataLen());
}
}
}
@Override
public void flushPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor eventExecutor,
boolean force,
BiConsumer callBack) {
if (!force && pageAddress.isDfsValid()) {
if (callBack != null) {
callBack.accept(true, null);
}
return;
}
//add reference before thread runs.
final DataPage oriDataPage = pageAddress.getDataPage();
eventExecutor.submit(() -> {
boolean success = false;
Throwable throwable = null;
boolean pageIsNull = false;
DataPage dataPage = oriDataPage;
try {
// recheck whether to flush
if (force || !pageAddress.isDfsValid()) {
if (dataPage == null) {
pageIsNull = true;
if (pageAddress.isLocalValid()) {
dataPage = getDataPage(localFileManager, gRegionContext,
pageAddress::getLocalAddress, pageAddress, true);
} else if (pageAddress.isDfsValid()) {
// this may happen when data migration or snapshot for the first time after rescale
dataPage = getDataPage(dfsFileManager, gRegionContext,
pageAddress::getDfsAddress, pageAddress, false);
if (dataPage != null) {
//TODO why null?
// after restore, we need to cache the data
final DataPage cacheDataPage = dataPage;
cacheDataPage.addReferenceCount();
eventExecutor.submit(() -> {
try {
//TODO here actually need flushEvent to write data to localCacheDisk.
FileWriter fileWriter = getOrCreateFileWriter(localFileWriters, localFileManager, eventExecutor);
internalAddPage(localFileManager,
fileWriter,
pageAddress,
cacheDataPage,
gRegionContext,
true,
true);
} catch (Exception e) {
LOG.error("cache data failed, {}", e);
} finally {
cacheDataPage.delReferenceCount(ReleaseType.Normal);
}
}
);
}
}
}
Preconditions.checkNotNull(dataPage, "Data page is null");
FileWriter fileWriter = getOrCreateFileWriter(dfsFileWriters, dfsFileManager, eventExecutor);
internalAddPage(dfsFileManager, fileWriter, pageAddress, dataPage, gRegionContext, false, false);
}
success = true;
} catch (Exception e) {
success = false;
throwable = e;
LOG.error("error when adding page to cache: pageIsNull={}, {}", pageIsNull, e.getMessage(), e);
} finally {
if (dataPage != null) {
dataPage.delReferenceCount(ReleaseType.Normal);
}
if (callBack != null) {
callBack.accept(success, throwable);
}
}
});
}
@Override
public void sync(EventExecutor eventExecutor) throws IOException {
FileWriter fileWriter = dfsFileWriters.get(eventExecutor);
if (fileWriter != null) {
// file writer guarantee the thread safe for sync
fileWriter.sync();
}
}
@Override
public void close() throws IOException {
synchronized (this) {
if (closed) {
LOG.warn("NoCapacityFileCache has been closed");
return;
}
closed = true;
}
// DB should guarantee write will not happen after close is called.
for (FileWriter fileWriter : localFileWriters.values()) {
localFileManager.closeFileWriter(fileWriter);
}
localFileWriters.clear();
for (FileWriter fileWriter : dfsFileWriters.values()) {
dfsFileManager.closeFileWriter(fileWriter);
}
dfsFileWriters.clear();
LOG.info("InfiniteCapacityFileCache is closed");
}
private void internalAddPage(
FileManager fileManager,
FileWriter fileWriter,
PageAddress pageAddress,
DataPage dataPage,
GRegionContext gRegionContext,
boolean isLocal,
boolean flushForce
) throws Exception {
long address;
long startTime = System.nanoTime();
int diskDataLen;
int numRetires = 0;
while (true) {
try {
address = fileWriter.getAddress();
diskDataLen = dataPage.write(fileWriter,
gRegionContext.getPageSerdeFlink(),
pageAddress,
isLocal ? gRegionContext.getGContext().getFlushWholePageGCompressAlgorithm() : GCompressAlgorithm.None,
gRegionContext.getGContext().getGConfiguration().isChecksumEnable());
fileWriter.resetFailCount();
break;
} catch (Exception e) {
//TODO: #SR add a filter or something else for exceptions.
fileWriter.increasFailCount();
++numRetires;
if (numRetires > 3) {
LOG.error("internal add page exception: {}, {}, {}", fileWriter, pageAddress, e);
throw e;
}
}
}
updateWriteStat(diskDataLen, dataPage.getSize(), System.nanoTime() - startTime, isLocal);
if (flushForce) {
fileWriter.flush();
}
// add the reference for the file used by new address before we check the PageAddress status
fileManager.incDBReference(address, pageAddress.getDataLen());
boolean pageValid;
boolean hasOldAddress = false;
long oldAddress = 0;
synchronized (pageAddress) {
//TODO DFS now not support compress. HAVE BUG.
pageAddress.afterFlush(diskDataLen, gRegionContext.getGContext().getFlushWholePageGCompressAlgorithm());
pageValid = pageAddress.isPageValid();
if (pageValid) {
hasOldAddress = isLocal ? pageAddress.isLocalValid() : pageAddress.isDfsValid();
oldAddress = isLocal ? pageAddress.getLocalAddress() : pageAddress.getDfsAddress();
}
// we always set the address no matter whether the page is discarded
if (isLocal) {
pageAddress.setLocalAddress(address);
pageAddress.setLocalStatus(true);
} else {
pageAddress.setDfsAddress(address);
pageAddress.setDfsStatus(true);
}
}
if (pageValid) {
// if old address exists, we need to dereference the file it used
if (hasOldAddress) {
// decrement reference may lead to delete file, but it may be used somewhere
// 1. read by main thread
// In this case, we use access number to delay the deletion. Because this page
// is not discarded, so it's important to set new address before getting access
// number so that old address will not be used by next access.
// 2. read by split or compaction
// In this case, we will retry in getPage so that we can use the new address
// to read data again
fileManager.decDBReference(
oldAddress,
gContext.getAccessNumber(),
System.currentTimeMillis(),
pageAddress.getDataLen());
}
} else {
// if page is not valid, we should deference file used by the new address.
fileManager.decDBReference(
address,
gContext.getAccessNumber(),
System.currentTimeMillis(),
pageAddress.getDataLen());
}
}
private DataPage getDataPage(
FileManager fileManager,
GRegionContext gRegionContext,
Callable addressCallable,
PageAddress pageAddress,
boolean isLocal
) throws Exception {
int unexpectedTries = 0;
int expectedTries = 0;
DataPage dataPage;
long address = addressCallable.call();
while (true) {
try {
FileReader fileReader = fileManager.getFileReader(address);
long offset = fileManager.getFileOffset(address);
long startTime = System.nanoTime();
dataPage = fileManager.getDataPageUtil().getDataPageFromReader(gRegionContext.getPageSerdeFlink(),
fileReader,
(int) offset,
pageAddress);
updateReadStat(pageAddress.getOnDiskDataLen(), System.nanoTime() - startTime, isLocal);
return dataPage;
} catch (Exception e) {
long oldAddress = address;
// the address may be replaced, and we should update it every time
address = addressCallable.call();
// only when the address is updated, we increment the number of retry
if (oldAddress == address) {
unexpectedTries += 1;
} else {
expectedTries += 1;
}
if (unexpectedTries >= 3 || expectedTries >= 10) {
LOG.error("get page failed, try " + unexpectedTries +
" times unexpectedly, and try " + expectedTries + " times expectedly, last exception " + e);
throw e;
}
}
}
}
/**
* This will be executed in the event executor, so file writer for an
* event executor will not be created concurrently.
*/
private FileWriter getOrCreateFileWriter(
Map fileWriterMap,
FileManager fileManager,
EventExecutor eventExecutor) {
if (closed) {
throw new GeminiRuntimeException("InfiniteCapacityFileCache has been closed.");
}
FileWriter fileWriter = fileWriterMap.get(eventExecutor);
if (fileWriter != null && (!fileWriter.isValid() || fileWriter.size() >= maxFileSize)) {
fileManager.closeFileWriter(fileWriter);
fileWriterMap.remove(eventExecutor);
LOG.debug("close file writer {}/{} in {}", fileWriter.getFileID(), fileWriter.isValid(), eventExecutor);
fileWriter = null;
}
if (fileWriter == null) {
fileWriter = fileManager.createNewFileWriter();
fileWriterMap.put(eventExecutor, fileWriter);
LOG.debug("create new file writer {} in {}", fileWriter.getFileID(), eventExecutor);
}
return fileWriter;
}
private void updateWriteStat(long diskLen, long size, long time, boolean isLocal) {
if (isLocal) {
fileCacheStat.addLocalWrite(diskLen, size, time);
} else {
fileCacheStat.addDFSWrite(size, time);
}
}
private void updateReadStat(long time, long size, boolean isLocal) {
if (isLocal) {
fileCacheStat.addLocalRead(size, time);
} else {
fileCacheStat.addDFSRead(size, time);
}
}
}