Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.filecache;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileWriter;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressSingleImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.util.Preconditions;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.function.BiConsumer;
/**
* Cache based on file. Generally, the page will store the data in the cache first,
* and it depends on the implementation when to flush the page to the destination
* storage. {@link #flushPage} can be used to flush the page to the destination storage
* directly. It depends on the implementation whether the methods are blocked on IO.
*/
public abstract class FileCache implements Closeable {
/**
* Type of file cache classified by the capacity.
*/
public enum FileCacheType {
/**
* There is no capacity.
*/
NONE,
/**
* There is infinite capacity.
*/
INFINITE,
/**
* There is limited capacity.
*/
LIMITED
}
/**
* Capacity of the cache.
*/
protected final long capacity;
protected final FileCacheStat fileCacheStat;
public FileCache(long capacity, FileCacheStat fileCacheStat) {
this.capacity = capacity;
this.fileCacheStat = Preconditions.checkNotNull(fileCacheStat);
}
/**
* Return the capacity of the cache.
*/
public long capacity() {
return capacity;
}
/**
* Whether the page is cached.
*/
public abstract boolean isCached(PageAddress pageAddress);
/**
* Add the page to cache. Must guarantee the page is in memory or on dfs.
* It depends on the implementation whether this method is sync
* or async. It can use {@param eventExecutor} to be async.
* We will run the {@param callBack} finally, and deliver a boolean
* argument which indicates whether the page has been successfully
* added.
*
* @param pageAddress address of page to add.
* @param gRegionContext context of region this page belongs to.
* @param eventExecutor executor used to be async.
* @param callBack code to execute after page is added to cache.
*/
public abstract void addPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor eventExecutor,
@Nullable BiConsumer callBack);
/**
* Get the page.
*/
public abstract GByteBuffer getPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor eventExecutor);
/**
* Discard the page from the cache and destination storage.
* It depends on the implementation whether this method is
* sync or async. It can use {@param eventExecutor} to be async.
*
* @param pageAddress address of page to discard.
* @param gRegionContext context of region this page belongs to.
* @param eventExecutor executor used to be async.
*/
public abstract void discardPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor eventExecutor);
/**
* Flush the page to the destination storage without caching.
* It depends on the implementation whether this method is sync or async.
* It can use {@param eventExecutor} to be async. If {@param force} is true,
* the page will always be flushed no matter whether it has been flushed,
* and replace the old address. We will run the {@param callBack} finally,
* and deliver a boolean argument which indicates whether the page has been
* successfully added.
*
* @param pageAddress address of page to flush.
* @param gRegionContext context of region this page belongs to.
* @param eventExecutor executor used to be async.
* @param force whether to force to flush.
* @param callBack code to execute after page is flushed.
*/
public abstract void flushPage(
PageAddress pageAddress,
GRegionContext gRegionContext,
EventExecutor eventExecutor,
boolean force,
@Nullable BiConsumer callBack);
/**
* Add a batch of pages to cache. It will guarantee that pages
* are either all successful to add or all failed to add. The
* semantic of success is that once the page address is updated,
* data can be read immediately. There won't be only some pages
* to add successfully. It depends on the implementation whether
* this method is sync or async. It can use {@param eventExecutor}
* to be async. We will run the {@param callBacks} finally, and
* deliver a boolean argument which indicates whether pages has
* been successfully added.
*
* @param pages list of pages to add.
* @param gRegionContexts list of region contexts.
* @param eventExecutor executor used to be async.
* @param callBacks list of callbacks to execute after add.
*/
public abstract void addBatchPages(
List pages,
List gRegionContexts,
EventExecutor eventExecutor,
List> callBacks);
/**
* Flush a batch of pages to the destination storage without caching.
* It will guarantee that pages are either all successful to flush or
* all fail to flush. There won't be only some pages to flush successfully.
* The semantic of success is that once the page address is updated,
* data can be read immediately. It depends on the implementation whether
* this method is sync or async. It can use {@param eventExecutor} to be async.
* We will run the {@param callBacks} , and deliver a boolean argument which
* indicates whether pages has been successfully flushed.
*
* @param pages a collection of pages to flush.
* @param gRegionContexts list of region contexts.
* @param eventExecutor executor used to be async.
* @param force whether to force to flush the page if there is already an address.
* @param flushLocal whether to ensuer local is valid before update dfs address.
* @param callBacks code to execute after flush no matter it's successful or failed.
*/
public abstract void flushBatchPages(
List pages,
List gRegionContexts,
EventExecutor eventExecutor,
boolean force,
boolean flushLocal,
List> callBacks);
/**
* TODO this is a hack method to sync snapshot data. We will improve it later.
*/
public abstract void sync() throws IOException;
public FileCacheStat getFileCacheStat() {
return fileCacheStat;
}
public abstract FileCacheType getFileCacheType();
/**
* Writes page to file with the given file writer, update address of page
* and update references of files in the given file manager.
*
* @param fileManager file manager backed the file to write.
* @param fileWriter writer for the file.
* @param pageAddress the page.
* @param gByteBuffer buffer that contains the data.
* @param gRegionContext region context.
* @param isLocal whether the address to update is local.
* @param flushForce whether to flush after write.
*/
void internalAddPage(
FileManager fileManager,
FileWriter fileWriter,
PageAddress pageAddress,
GByteBuffer gByteBuffer,
GRegionContext gRegionContext,
boolean isLocal,
boolean flushForce) throws Exception {
Preconditions.checkArgument(pageAddress instanceof PageAddressSingleImpl);
long address = writePage(fileManager, fileWriter, pageAddress, gByteBuffer, gRegionContext, isLocal);
if (flushForce) {
fileWriter.flush();
}
updatePageAddress(fileManager, pageAddress, address, isLocal,
gRegionContext.getGContext().getAccessNumber());
}
/**
* Writes page to file with the given file writer, and return the
* address of page if successful.
*/
long writePage(
FileManager fileManager,
FileWriter fileWriter,
PageAddress page,
GByteBuffer buffer,
GRegionContext gRegionContext,
boolean isLocal) throws IOException {
long address;
int numRetires = 0;
long startTime = System.nanoTime();
int diskDataLen;
while (true) {
try {
address = fileWriter.getAddress();
diskDataLen = fileManager.getDataPageUtil().write(
fileWriter,
buffer,
page,
gRegionContext.getGContext().getFlushWholePageGCompressAlgorithm(isLocal),
gRegionContext.getGContext().getWholePageCompressThreshold(),
gRegionContext.getGContext().getGConfiguration().isChecksumEnable());
break;
} catch (Exception e) {
//TODO: #SR add a filter or something else for exceptions.
fileWriter.increasFailCount();
++numRetires;
if (numRetires >= 3) {
throw e;
}
}
}
fileWriter.resetFailCount();
updateWriteStat(diskDataLen, page.getDataLen(), System.nanoTime() - startTime, isLocal);
return address;
}
/**
* Replace address of page to the new one, and update references
* of files backed the old and new addresses in the given file
* manager. Note that new address has not refer the backed file
* when this function is called.
*
* @param fileManager file manager backed the old and new address.
* @param page page to update references.
* @param newAddress new address.
* @param isLocal whether the address to update is local.
* @param accessNumber access number when address is updated.
*/
void updatePageAddress(
FileManager fileManager,
PageAddress page,
long newAddress,
boolean isLocal,
long accessNumber) {
// add the reference for the file used by new address before we goes on
// this fix [BLINK-20749612]
fileManager.incDBReference(newAddress, page.getDataLen());
boolean pageValid;
boolean hasOldAddress = false;
long oldAddress = 0;
synchronized (page) {
pageValid = page.isPageValid();
if (pageValid) {
hasOldAddress = isLocal ? page.isLocalValid() : page.isDfsValid();
oldAddress = isLocal ? page.getLocalAddress() : page.getDfsAddress();
}
// we always set the address no matter whether the page is discarded
if (isLocal) {
page.setLocalAddress(newAddress);
page.setLocalStatus(true);
} else {
page.setDfsAddress(newAddress);
page.setDfsStatus(true);
}
}
if (pageValid) {
// if old address exists, we need to dereference the file it used
if (hasOldAddress) {
// decrement reference may lead to delete file, but it may be used somewhere
// 1. read by main thread
// In this case, we use access number to delay the deletion. Because this page
// is not discarded, so it's important to set new address before getting access
// number so that old address will not be used by next access.
// 2. read by split or compaction
// In this case, we will retry in getPage so that we can use the new address
// to read data again
fileManager.decDBReference(
oldAddress,
accessNumber,
System.currentTimeMillis(),
page.getDataLen());
}
} else {
// if page is not valid, we should deference file used by the new address.
fileManager.decDBReference(
newAddress,
accessNumber,
System.currentTimeMillis(),
page.getDataLen());
}
}
private void updateWriteStat(long diskLen, long size, long time, boolean isLocal) {
if (isLocal) {
fileCacheStat.addLocalWrite(diskLen, size, time);
} else {
fileCacheStat.addDFSWrite(diskLen, size, time);
}
}
public static FileCache createFileCache(
GContext context,
FileManager localFileManager,
FileManager dfsFileManager) {
GConfiguration gConfiguration = context.getGConfiguration();
FileCacheType type = gConfiguration.getFileCacheType();
FileCache fileCache;
switch (type) {
case NONE:
fileCache = new NoFileCache(context, dfsFileManager);
break;
case INFINITE:
fileCache = new InfiniteFileCache(context, localFileManager, dfsFileManager);
break;
case LIMITED:
throw new GeminiRuntimeException("Limited file cache is not supported currently");
default:
throw new GeminiRuntimeException("Unknown file cache type " + type);
}
return fileCache;
}
}