Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.flink.runtime.state.gemini.engine.handler.EvictHandlerSepImpl Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.handler;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.Supervisor;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.LogicChainedPage;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;
import org.apache.flink.shaded.guava18.com.google.common.base.MoreObjects;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutorGroup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
/**
* EvictHandlerImpl.
*/
public class EvictHandlerSepImpl implements Handler {
private static final Logger LOG = LoggerFactory.getLogger(EvictHandlerSepImpl.class);
private final String name;
private final GContext gContext;
private final Supervisor supervisor;
private final CacheManager cacheManager;
private final EventExecutorGroup flushEventExecutorGroup;
private final Map readyToEvictDataPageMap = new LinkedHashMap<>();
private final AtomicLong preparedFlushedPageSize = new AtomicLong(0);
private final AtomicLong runningFlushedPageSize = new AtomicLong(0);
private final LogicPagePriorityPool pagePriorityPool;
private final AtomicLong curThreadTotalPageUsedMem = new AtomicLong(0);
private final AtomicLong curThreadTotalFlushedSize = new AtomicLong(0);
private final AtomicLong curThreadTotalEvictedSize = new AtomicLong(0);
private final long curThreadMemLowMark;
private final long curThreadMemMidMark;
private final long curThreadMemHighMark;
private final long maxPreparedFlushSize;
private final int batchSortCount;
private volatile int autoFillCursor = 0;
public EvictHandlerSepImpl(String name, GContext gContext) {
this.name = name;
this.gContext = gContext;
this.supervisor = gContext.getSupervisor();
this.cacheManager = gContext.getSupervisor().getCacheManager();
this.flushEventExecutorGroup = gContext.getSupervisor().getFlushExecutorGroup();
//TODO can be dynamic adjust ratio
int totalThread = gContext.getGConfiguration().getRegionThreadNum();
curThreadMemLowMark = cacheManager.getMemLowMark() / totalThread;
curThreadMemMidMark = cacheManager.getMemMidMark() / totalThread;
curThreadMemHighMark = cacheManager.getMemHighMark() / totalThread;
maxPreparedFlushSize = curThreadMemMidMark / 20;
batchSortCount = gContext.getGConfiguration().getBatchSortCount();
this.pagePriorityPool = new LogicPagePriorityPool(batchSortCount, (long) (maxPreparedFlushSize * 1.5));
}
@Override
public void handle() {
}
public void addEvictablePage(PageAddress pageAddress, GRegion gRegion) {
if (this.curThreadTotalPageUsedMem.get() < curThreadMemLowMark) {
return;
}
pagePriorityPool.add(pageAddress, gRegion);
doPrepareFlush();
}
public void removeInvalidPage(GRegion gRegion, int curIndex, int relatedIndex, List pageAddressList) {
//1. remove invalid Page
int totalRemoveReadToEvictSize = 0;
int totalRemoveCount = 0;
for (PageAddress pageAddress : pageAddressList) {
totalRemoveCount += pagePriorityPool.remove(pageAddress) ? 1 : 0;
if (readyToEvictDataPageMap.remove(pageAddress) != null) {
totalRemoveReadToEvictSize += pageAddress.getDataLen();
totalRemoveCount++;
}
}
if (totalRemoveCount > 0) {
tryFillPool(gRegion, curIndex, relatedIndex, totalRemoveCount);
}
if (totalRemoveReadToEvictSize > 0) {
preparedFlushedPageSize.addAndGet(-totalRemoveReadToEvictSize);
//2. prepare flush another pages
if (this.curThreadTotalPageUsedMem.get() < curThreadMemLowMark) {
return;
}
doPrepareFlush();
}
}
private int tryFillPool(GRegion gRegion, int curIndex, int relatedIndex, int minAddPage) {
if (pagePriorityPool.getCurDataLen() >= maxPreparedFlushSize) {
return curIndex;
}
//we need to feed pool.
LogicChainedPage[] pages = gRegion.getPageStore().getPageIndex().getPageIndex();
int scanCount = 0;
int addedPage = 0;
int cursor = curIndex + 1;
int indexDeep = 0;
while (indexDeep < 3 && addedPage < minAddPage && gContext.isDBNormal()) {
if (cursor >= pages.length) {
cursor = 0;
}
if (cursor == curIndex) {
indexDeep++;
cursor = curIndex + 1;
continue;
}
if (cursor == relatedIndex) {
//skip the split or compact page. because it will be added soon.
cursor++;
continue;
}
scanCount++;
if (scanCount >= pages.length * 3) {
break;
}
LogicChainedPage logicChainedPage = pages[cursor];
if (logicChainedPage != null && logicChainedPage.getCurrentPageChainIndex() >= indexDeep) {
//only check index 0, best-efforts to feed the pool.
PageAddress pageAddress = logicChainedPage.getPageAddress(indexDeep);
if (pageAddress != null && pageAddress.getDataPageNoReference() != null) {
//thread safe, so we don't need to add reference.
if (!pagePriorityPool.dataMap.containsKey(pageAddress) && !readyToEvictDataPageMap.containsKey(
pageAddress)) {
pagePriorityPool.add(pageAddress, gRegion);
addedPage++;
if (addedPage >= minAddPage) {
break;
}
}
}
}
cursor++;
}
LOG.debug("tryFillPool scanCount=" + scanCount + " ,minAddPage=" + minAddPage + " ,addedPage=" + addedPage + " ,cursor=" + cursor + " ,pages.length=" + pages.length);
return cursor >= pages.length ? 0 : cursor;
}
private void doPrepareFlush() {
long needPrepareFlush = maxPreparedFlushSize - preparedFlushedPageSize.get();
if (needPrepareFlush <= 0) {
return;
}
if (this.curThreadTotalPageUsedMem.get() < curThreadMemMidMark) {
return;
}
long flushedSize = 0;
//no need to sort, that's a small pool.
Iterator> pagesIterator = pagePriorityPool.dataMap.entrySet().iterator();
int scanPage = 0;
while (pagesIterator.hasNext()) {
final Map.Entry entry = pagesIterator.next();
final DataPage dataPage = entry.getKey().getDataPage();
if (dataPage == null) {
throw new GeminiRuntimeException("Internal Bug");
}
final int dataPageSize = dataPage.getSize();
pagesIterator.remove();
pagePriorityPool.addDataLen(-dataPageSize);
scanPage++;
if (readyToEvictDataPageMap.containsKey(entry.getKey())) {
throw new GeminiRuntimeException("Internal Bug");
} else {
readyToEvictDataPageMap.put(entry.getKey(), entry.getValue());
}
runningFlushedPageSize.addAndGet(dataPageSize);
preparedFlushedPageSize.addAndGet(dataPageSize);
final EventExecutor flushEventExecutor = flushEventExecutorGroup.next();
gContext.getSupervisor().getFileCache().addPage(entry.getKey(),
entry.getValue().getGRegionContext(),
flushEventExecutor,
(success, throwable) -> {
runningFlushedPageSize.addAndGet(-dataPageSize);
curThreadTotalFlushedSize.addAndGet(dataPageSize);
dataPage.delReferenceCount(ReleaseType.Normal);
if (!success) {
LOG.error("prepare flush {} failed, {}", entry.getKey(), throwable);
gContext.setDBInternalError(new GeminiRuntimeException("Prepare flush failed, " + throwable));
}
});
flushedSize += dataPageSize;
if (flushedSize >= needPrepareFlush) {
break;
}
}
LOG.debug(
"EvictHandler doFlushRegion totalPreparePool({}) readyToEvictDataPageMap({}) expectedSize ({}) flushedPageSize({}) scanPage({}) preparedFlushedPageSize({}),runningFlushedPageSize({})",
pagePriorityPool.size(),
readyToEvictDataPageMap.size(),
needPrepareFlush,
flushedSize,
scanPage,
preparedFlushedPageSize.get(),
runningFlushedPageSize.get());
}
public void addPageUsedMemory(GRegion gRegion, int logicPageSize, boolean needEvict) {
this.curThreadTotalPageUsedMem.addAndGet(logicPageSize);
//do evict
if (needEvict) {
doEvict(gRegion, logicPageSize);
}
}
private void doEvict(GRegion gRegion, int expectedSize) {
if (this.curThreadTotalPageUsedMem.get() < this.curThreadMemHighMark) {
return;
}
//no any page is readied on pool, just return.
if (readyToEvictDataPageMap.size() == 0 && preparedFlushedPageSize.get() == 0) {
autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, batchSortCount);
return;
}
long startTime = System.currentTimeMillis();
int evictedSize = 0;
int totalScanPageCount = 0;
int totalEvictPageCount = 0;
while (evictedSize < expectedSize && gContext.isDBNormal()) {
long curRunningSize = runningFlushedPageSize.get();
Iterator> readyIterator = readyToEvictDataPageMap.entrySet().iterator();
while (readyIterator.hasNext()) {
Map.Entry entry = readyIterator.next();
PageAddress pageAddress = entry.getKey();
totalScanPageCount++;
//no need add reference.
DataPage dataPage = pageAddress.getDataPageNoReference();
if (dataPage == null) {
throw new GeminiRuntimeException("Internal Bug");
}
if (gContext.getSupervisor().getFileCache().isCached(pageAddress)) {
evictedSize += dataPage.getSize();
totalEvictPageCount++;
//for removing reference from PageAddress
pageAddress.setDataPage(null);
if (dataPage.getGBinaryHashMap().getGByteBuffer().getCnt() != 1) {
gContext.getSupervisor().getDiscardOrEvictPageReleaseManager().addMonitorPageStillHaveReference(
dataPage.getGBinaryHashMap().getGByteBuffer(),
ReleaseType.Discard,
pageAddress);
}
dataPage.delReferenceCount(ReleaseType.Discard);
entry.getValue().getGRegionContext().getPageStoreStats().addPageUsedMemory(entry.getValue(), -dataPage.getSize(),
false);
readyIterator.remove();
curThreadTotalEvictedSize.addAndGet(dataPage.getSize());
cacheManager.getCacheStats().addPageCacheEvictSize(dataPage.getSize());
preparedFlushedPageSize.addAndGet(-dataPage.getSize());
} else {
//one by one evict
continue;
}
if (evictedSize >= expectedSize) {
break;
}
}
long nowTime = System.currentTimeMillis();
if (evictedSize < expectedSize) {
//slow flushing and some wrong condition both be regarded as block.
this.cacheManager.getCacheStats().addEvictBlock(1);
if (curRunningSize >= (expectedSize - evictedSize)) {
//force wait.
while (curRunningSize - runningFlushedPageSize.get() < (expectedSize - evictedSize) && gContext.isDBNormal()) {
autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, batchSortCount);
LOG.info(
"EvictHandler doEvict blocking {}ms ,have run ({})ms,expectedSize({}), evictedSize({}),scanPageCount({}) readyToEvictDataPageMap({}), beforeRunning({}), preparedFlushedPageSize({}), runningFlushedPageSize({})",
(System.currentTimeMillis() - nowTime),
(nowTime - startTime),
expectedSize,
evictedSize,
totalScanPageCount,
readyToEvictDataPageMap.size(),
curRunningSize,
preparedFlushedPageSize.get(),
runningFlushedPageSize.get());
}
} else {
LOG.info(
"EvictHandler doEvict NOT WORK,have run ({})ms,expectedSize({}), evictedSize({}),scanPageCount({}) readyToEvictDataPageMap({}), preparedFlushedPageSize({}), runningFlushedPageSize({})",
(nowTime - startTime),
expectedSize,
evictedSize,
totalScanPageCount,
readyToEvictDataPageMap.size(),
preparedFlushedPageSize.get(),
runningFlushedPageSize.get());
autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, batchSortCount);
doPrepareFlush();
continue;
}
} else {
autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, totalEvictPageCount);
break;
}
}
LOG.debug(
"EvictHandler doEvict totalPreparePool({}) readyToEvictDataPageMap({}) expectedSize ({}) evictedSize({}) scanPage({}) readyToEvictDataPageMap({}), preparedFlushedPageSize({}),runningFlushedPageSize({})",
pagePriorityPool.size(),
readyToEvictDataPageMap.size(),
expectedSize,
evictedSize,
totalScanPageCount,
readyToEvictDataPageMap.size(),
preparedFlushedPageSize.get(),
runningFlushedPageSize.get());
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this).
add("name", name).
add("curThreadMemLowMark", curThreadMemLowMark).
add("curThreadMemMidMark", curThreadMemMidMark).
add("curThreadMemHighMark", curThreadMemHighMark).
add("curThreadTotalPageUsedMem", curThreadTotalPageUsedMem).
add("maxPreparedFlushSize", maxPreparedFlushSize).
add("runningFlushedPageSize", runningFlushedPageSize).
add("preparedFlushedPageSize", preparedFlushedPageSize).
add("readyToEvictDataPageMapCount", readyToEvictDataPageMap.size()).
add("logicPagePriorityPoolSize", pagePriorityPool.size()).
add("logicPagePriorityPoolDataLen", pagePriorityPool.curDataLen).
add("curThreadTotalEvictedSize", curThreadTotalEvictedSize).
add("curThreadTotalFlushedSize", curThreadTotalFlushedSize).toString();
}
/**
* LogicPagePriorityPool.
* not thread safe.
*/
public static class LogicPagePriorityPool {
HashMap dataMap = new HashMap<>();
private long curDataLen = 0;
private final long maxDataLen;
private int curCount = 0;
private final int batchSortCount;
public LogicPagePriorityPool(int batchSortCount, long maxDataLen) {
this.maxDataLen = maxDataLen;
this.batchSortCount = batchSortCount;
}
public int size() {
return dataMap.size();
}
public void add(PageAddress pageAddress, GRegion gRegion) {
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
return;
}
try {
if (dataMap.put(pageAddress, gRegion) == null) {
curDataLen += dataPage.getSize();
if (curDataLen < maxDataLen) {
return;
}
curCount++;
//sorting each batchSortCount pages
if (curCount < batchSortCount) {
return;
}
//a costly sorting
List dataList = dataMap.keySet().stream().map(k -> new SortedEntry(k)).collect(
Collectors.toList());
Collections.sort(dataList, Comparator.comparingDouble(SortedEntry::getValue));
int index = dataList.size() - 1;
while (curDataLen > maxDataLen && index >= 0) {
PageAddress lastPageAddress = dataList.get(index).pageAddress;
remove(lastPageAddress);
index--;
}
curCount = 0;
} else {
throw new GeminiRuntimeException("InternalBug");
}
} finally {
dataPage.delReferenceCount(ReleaseType.Normal);
}
}
public boolean remove(PageAddress pageAddress) {
if (dataMap.remove(pageAddress) != null) {
DataPage dataPage = pageAddress.getDataPageNoReference();
//no need reference
if (dataPage == null) {
throw new GeminiRuntimeException("InternalBug");
}
curDataLen -= dataPage.getSize();
return true;
}
return false;
}
public void addDataLen(int dataLen) {
curDataLen += dataLen;
}
@VisibleForTesting
public HashMap getDataMap() {
return dataMap;
}
@VisibleForTesting
public long getCurDataLen() {
return curDataLen;
}
@VisibleForTesting
public long getSize() {
return dataMap.size();
}
}
/**
* SortedEntry.
*/
public static class SortedEntry {
double sortedValue;
PageAddress pageAddress;
public SortedEntry(PageAddress pageAddress) {
this.pageAddress = pageAddress;
this.sortedValue = getComparableValueFromKey(pageAddress);
}
public double getValue() {
return this.sortedValue;
}
}
public static double getComparableValueFromKey(PageAddress pageAddress) {
//requestCount small or compactionCount big
DataPage dataPage = pageAddress.getDataPageNoReference();
if (dataPage == null) {
throw new GeminiRuntimeException("InternalBug");
}
long requestCount = pageAddress.getRequestCount();
double requestCountDouble = requestCount == 0 ? (double) 1 : (double) requestCount;
return requestCountDouble / dataPage.getCompactionCount();
}
}