Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.vm;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.LogicalPageChain;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressCompositeImpl;
import org.apache.flink.runtime.state.gemini.engine.page.PageStore;
import org.apache.flink.shaded.guava18.com.google.common.base.MoreObjects;
import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
/**
* EvictablePagePoolSampleImpl is to reduce the cost of sorting, so it's a sample pool of all pages.
* it will contain both PageAddressSingleImpl and PageAddressCompositeImpl.
* If a PageAddressCompositeImpl entered the pool, but only parts PageAddressSingleImpl of it is evicted,
* it will be kept in this pool.
* the key for performance is to keep this sample pool full,
* not thread safe.
*/
public class EvictablePagePoolSampleImpl implements EvictablePagePool {
private static final Logger LOG = LoggerFactory.getLogger(EvictablePagePoolSampleImpl.class);
private final CacheManager cacheManager;
//PageAddressComposite or PageAddressSingle
private final Map dataPoolMap = new ConcurrentHashMap<>();
//PageAddressComposite's memory size when adding to pool.
private final Map dataPoolSizeMap = new ConcurrentHashMap<>();
private final AtomicLong curDataLen = new AtomicLong(0);
private final long maxDataLen;
private final long extraMaxDataLen;
private final int minSortedListCountForFlush;
//for fast sort list.
private volatile List lastOrderList = new ArrayList<>();
private final EvictHandlerSepImpl evictHandlerSep;
private final GContext gContext;
private final AtomicBoolean fillPollRunning = new AtomicBoolean(false);
private final ExecutorService fillPoolExecutor;
private final Map regionCursorMap = new HashMap<>();
private final int intervalFillPool = 1000;
private volatile long lastFillPoolTime = System.currentTimeMillis();
private final boolean evictBaseOnPageAddressComposite;
public EvictablePagePoolSampleImpl(
EvictHandlerSepImpl evictHandlerSep, GContext gContext, CacheManager cacheManager) {
this.gContext = gContext;
this.cacheManager = cacheManager;
this.evictHandlerSep = evictHandlerSep;
minSortedListCountForFlush = gContext.getGConfiguration().getMinSortedListCountForFlush();
evictBaseOnPageAddressComposite = gContext.getGConfiguration().getEvictBaseOnPageAddressComposite();
int factor = gContext.getGConfiguration().getEvictPoolFactor();
long configMaxSize = evictHandlerSep.getMaxPreparedFlushSize() * factor;
if (configMaxSize > evictHandlerSep.getCurThreadMemLowMark() >> 1) {
//half of curThreadMemLowMark.
configMaxSize = evictHandlerSep.getCurThreadMemLowMark() >> 1;
}
this.maxDataLen = configMaxSize;
this.extraMaxDataLen = maxDataLen + (evictHandlerSep.getMaxPreparedFlushSize() << 1);
String prefix = gContext.getGConfiguration().getExecutorPrefixName();
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "EvictablePagePoolSampleImpl-%d").build();
this.fillPoolExecutor = new ThreadPoolExecutor(1,
1,
0L,
TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<>(Short.MAX_VALUE),
namedThreadFactory);
}
@Override
public int size() {
return dataPoolMap.size();
}
@Override
public long dataSize() {
return curDataLen.get();
}
@Override
public boolean remove(PageAddress pageAddress) {
if (dataPoolMap.remove(pageAddress) != null) {
//for PageAddressComposite, dataPoolSizeMap will record its subPage memory size.
Long size;
while (gContext.isDBNormal()) {
//concurrent control with partialSubPageFlush.
//corner case, not influence performance.
size = dataPoolSizeMap.remove(pageAddress);
if (size != null) {
curDataLen.addAndGet(-size);
return true;
}
}
}
return false;
}
@Override
public void partialSubPageFlush(PageAddress pageAddress, int curFlushedSize) {
Long current = dataPoolSizeMap.remove(pageAddress);
if (current == null) {
//already removed.
return;
}
//when some subPage has loaded and put into cache, then negative will happen. set to 0 is ok.
long now = current < curFlushedSize ? 0 : current - curFlushedSize;
dataPoolSizeMap.put(pageAddress, now);
curDataLen.addAndGet(now - current);
}
@Override
public void add(PageAddress pageAddress, GRegion gRegion) {
if (curDataLen.get() >= maxDataLen) {
return;
}
internalAdd(pageAddress, gRegion, null);
}
@VisibleForTesting
int internalAdd(PageAddress pageAddress, GRegion gRegion, AtomicLong existedInPool) {
if (evictBaseOnPageAddressComposite) {
return internalAddPageAddressComposite(pageAddress, gRegion, existedInPool);
} else {
return internalAddOnlySinglePageAddress(pageAddress, gRegion, existedInPool);
}
}
int internalAddOnlySinglePageAddress(
PageAddress pageAddress, GRegion gRegion) {
return internalAddOnlySinglePageAddress(pageAddress, gRegion, null);
}
int internalAddPageAddressComposite(
PageAddress pageAddress, GRegion gRegion) {
return internalAddPageAddressComposite(pageAddress, gRegion, null);
}
int internalAddOnlySinglePageAddress(
PageAddress pageAddress, GRegion gRegion, AtomicLong existedInPool) {
//add singlePage to pool.
Iterator pageAddressIterator = pageAddress.pageIterator();
int totalAddedSize = 0;
while (pageAddressIterator.hasNext()) {
PageAddress subPage = pageAddressIterator.next();
DataPage dataPage = subPage.getDataPageNoReference();
if (dataPage == null) {
continue;
}
//avoid hugePage to evict
if (gRegion.getGRegionContext().isHugePage(dataPage)) {
gRegion.getGRegionContext().addHugePage(dataPage);
continue;
}
if (!evictHandlerSep.isPageAlreadyInEvict(subPage, subPage)) {
totalAddedSize += doAdd(subPage, gRegion);
} else {
if (existedInPool != null) {
existedInPool.incrementAndGet();
}
}
}
return totalAddedSize;
}
int internalAddPageAddressComposite(
PageAddress pageAddress, GRegion gRegion, AtomicLong existedInPool) {
Iterator pageAddressIterator = pageAddress.pageIterator();
while (pageAddressIterator.hasNext()) {
PageAddress subPage = pageAddressIterator.next();
if (evictHandlerSep.isPageAlreadyInEvict(pageAddress, subPage)) {
if (existedInPool != null) {
existedInPool.incrementAndGet();
}
return 0;
}
}
return doAdd(pageAddress, gRegion);
}
int doAdd(PageAddress pageAddress, GRegion gRegion) {
int curMemorySize = pageAddress.getMemorySize();
if (curMemorySize == 0) {
return 0;
}
PageAddress lockPageAddress = pageAddress instanceof PageAddressCompositeImpl
? ((PageAddressCompositeImpl) pageAddress).getMainPageAddress()
: pageAddress;
synchronized (lockPageAddress) {
//for composite PageAddress, check main page Address.
if (!pageAddress.isPageValid()) {
return 0;
}
if (!dataPoolMap.containsKey(pageAddress) && !dataPoolSizeMap.containsKey(pageAddress)) {
//for composite pageAddress, maybe there are only parts of subPage resident in memory.
//first add dataPoolSizeMap.
dataPoolSizeMap.put(pageAddress, (long) curMemorySize);
dataPoolMap.put(pageAddress, gRegion);
curDataLen.addAndGet(curMemorySize);
return curMemorySize;
}
}
return 0;
}
@Override
public boolean containsPage(PageAddress pageAddress) {
return dataPoolMap.containsKey(pageAddress);
}
@Override
public List getSortedList() {
//avoid long time blocking when filling pool.
if (lastOrderList.size() == 0 && this.curDataLen.get() >= maxDataLen) {
// true is to make sortedListAndArrangePool thread safe.
return sortedListAndArrangePool(true);
}
return lastOrderList;
}
@VisibleForTesting
List sortedListAndArrangePool(boolean fastSortList) {
long tickTime = cacheManager.getCurrentTickTime();
List dataList = dataPoolMap.entrySet().stream().map(entry -> {
//only do an effort to avoid invalid page.
PageAddress pageAddress = entry.getKey();
DataPage dataPage = pageAddress.getDataPageNoReference();
if (dataPage != null && pageAddress.isPageValid()) {
return new SortedEntry(entry.getKey(), entry.getValue(), dataPage.score(tickTime));
}
return null;
}).filter(Objects::nonNull).sorted(Comparator.comparingDouble(SortedEntry::getScore)).collect(Collectors.toList());
if (!fastSortList) {
int index = dataList.size() - 1;
while (curDataLen.get() > maxDataLen && index >= 0) {
PageAddress lastPageAddress = dataList.remove(index).pageAddress;
remove(lastPageAddress);
index--;
}
}
//fast sorted list
List result = dataList.subList(0, dataList.size() >> 1);
lastOrderList = new ArrayList<>(result);
return result;
}
@Override
public boolean tryFillPool(Set regions) {
//protect code.
if (evictHandlerSep.getCurThreadTotalPageUsedMem() < evictHandlerSep.getCurThreadMemLowMark() - evictHandlerSep.getMaxPreparedFlushSize()) {
return false;
}
if (!fillPollRunning.compareAndSet(false, true)) {
return false;
}
long currentTime = System.currentTimeMillis();
//sample pool is valid and not access the scheduled time. TODO we can dynamically adjust the intervalFillPool.
//why need scan all regions, consider this situation: one region was wrote some data and then never be accessed
//, including compaction, write, or read. then this data will always be resident in memory and never can be evicted.
if (isPoolValid(currentTime)) {
fillPollRunning.compareAndSet(true, false);
return false;
}
fillPoolExecutor.submit(() -> tryAsyncFillPool(regions, currentTime));
return true;
}
@VisibleForTesting
void tryAsyncFillPool(Set regions, long syncStartTime) {
Set candidateRegions = regions;
boolean isModifiable = false;
try {
//internalAdd an extra maxPreparedFlushSize to dataPool, to implement a slide window(bubble sort).
int loop = 0;
int filledSize = 0;
int needAddSize = (int) (extraMaxDataLen - curDataLen.get());
while (filledSize < needAddSize && gContext.isDBNormal()) {
List regionAssign = assignToRegion(candidateRegions, needAddSize - filledSize);
loop++;
boolean print = loop % 100 == 0;
int thisLoopSize = -1;
for (RegionChosen chosenRegion : regionAssign) {
if (chosenRegion.assignSize == 0) {
continue;
}
thisLoopSize = doFillPoolPerRegion(chosenRegion, print);
// there is no data in this region to choose, so remove it from the candidate set
if (thisLoopSize == 0) {
if (!isModifiable) {
// copy regions lazily to support modification
candidateRegions = new HashSet<>(regions);
isModifiable = true;
}
candidateRegions.remove(chosenRegion.gRegion);
}
filledSize += thisLoopSize;
}
//after fill page from all region, check whether need to continue.
if (extraMaxDataLen <= curDataLen.get()) {
break;
}
if (print) {
LOG.info(
"tryFillPool has run {}, cur needAddSize={} filledSize={} thisLoopSize={} current regionCount({}) audit({})",
loop,
needAddSize,
filledSize,
thisLoopSize,
regions.size(),
audit(regions));
}
//TODO add debug info.
if (candidateRegions.isEmpty()) {
LOG.info(
"tryFillPool NO Candidate, has run {}, cur needAddSize={} filledSize={} thisLoopSize={} current regionCount({}) audit({})",
loop,
needAddSize,
filledSize,
thisLoopSize,
regions.size(),
audit(regions));
break;
}
}
sortedListAndArrangePool(false);
} catch (Throwable e) {
LOG.error("Internal Bug!", e);
} finally {
long runTime = (System.currentTimeMillis() - syncStartTime);
cacheManager.getCacheStats().addFillPoolTime(runTime);
if (runTime > intervalFillPool) {
LOG.error("tryFillPool TOO SLOW! {} (ms)", runTime);
}
lastFillPoolTime = syncStartTime;
fillPollRunning.compareAndSet(true, false);
}
}
boolean isPoolValid(long currentTime) {
return lastOrderList.size() > minSortedListCountForFlush && curDataLen.get() >= (maxDataLen >> 1) && currentTime - lastFillPoolTime < intervalFillPool;
}
List assignToRegion(Set regions, int wantedSize) {
//TODO support decide which region is always kept in memory.
// actually, it's only an effort, if no memory here, we need ignore this description.
List result = new ArrayList<>();
long canChoseSize = 0;
for (GRegion gRegion : regions) {
RegionChosen regionChosen = regionCursorMap.computeIfAbsent(gRegion, RegionChosen::new);
regionChosen.usedMemory = gRegion.getGRegionContext().getPageStoreStats().getPageUsedMemory();
regionChosen.assignSize = 0;
canChoseSize += regionChosen.usedMemory;
result.add(regionChosen);
}
if (canChoseSize <= wantedSize << 1) {
//pool max size is half of curThreadMemLowMark.
wantedSize = (int) (canChoseSize >> 1);
}
Set removeRegionChosen = new HashSet<>();
int curLoopWanted = wantedSize;
int curRegionSize = regions.size();
out:
while (curLoopWanted > 0 && gContext.isDBNormal()) {
int averageWantedSize = curLoopWanted / curRegionSize + (curLoopWanted % curRegionSize == 0 ? 0 : 1);
for (RegionChosen regionChosen : result) {
if (regionChosen.usedMemory <= (regionChosen.assignSize << 1)) {
if (!removeRegionChosen.contains(regionChosen)) {
removeRegionChosen.add(regionChosen);
curRegionSize--;
}
continue;
}
if (regionChosen.usedMemory - regionChosen.assignSize > (averageWantedSize << 1)) {
int curAssign = checkSize(curLoopWanted < averageWantedSize ? curLoopWanted : averageWantedSize,
regionChosen);
regionChosen.assignSize += curAssign;
curLoopWanted -= curAssign;
if (curLoopWanted <= 0) {
break out;
}
} else if (regionChosen.usedMemory - regionChosen.assignSize > averageWantedSize) {
//fair assign, make sure all regions can be enter the pool.
int curAssign = checkSize(curLoopWanted < averageWantedSize >> 1
? curLoopWanted
: averageWantedSize >> 1, regionChosen);
regionChosen.assignSize += curAssign;
curLoopWanted -= curAssign;
if (curLoopWanted <= 0) {
break out;
}
}
}
//again, cal averageWantedSize and assign.
}
return result;
}
private int checkSize(int curAssign, RegionChosen regionChosen) {
int result = curAssign;
if (curAssign + regionChosen.assignSize > (regionChosen.usedMemory >> 1)) {
result = (int) ((regionChosen.usedMemory >> 1) - regionChosen.assignSize);
}
return result;
}
private String audit(Set regions) {
try {
long totalPageAddressCount = 0;
long totalPageSize = 0;
long totalPageInMemSize = 0;
for (GRegion region : regions) {
PageStore pageStore = region.getPageStore();
Iterator pageAddressIterator = pageStore.getPageIndex().pageIterator();
while (pageAddressIterator.hasNext()) {
totalPageAddressCount++;
PageAddress pageAddress = pageAddressIterator.next();
totalPageSize += pageAddress.getDataLen();
totalPageInMemSize += pageAddress.getMemorySize();
}
}
return String.format("totalPageAddressCount=%s,totalPageSize=%s,totalPageInMemSize=%s %s",
totalPageAddressCount,
totalPageSize,
totalPageInMemSize,
evictHandlerSep);
} catch (Exception e) {
LOG.warn("audit error", e);
return "audit error;";
}
}
@VisibleForTesting
int doFillPoolPerRegion(
RegionChosen regionChosen, boolean print) {
//fill the pool.
LogicalPageChain[] pages = regionChosen.gRegion.getPageStore().getPageIndex().getPageIndex();
int scanCount = 0;
int addedSize = 0;
int cursor = regionChosen.cursor;
int addedPerPageAddress;
AtomicLong existedInPool = new AtomicLong(0);
while (addedSize < regionChosen.assignSize && gContext.isDBNormal()) {
scanCount++;
LogicalPageChain logicalPageChain = pages[cursor];
if (logicalPageChain != null) {
for (int i = 0; i <= logicalPageChain.getCurrentPageChainIndex(); i++) {
PageAddress pageAddress = logicalPageChain.getPageAddress(i);
if (pageAddress != null) {
addedPerPageAddress = internalAdd(pageAddress, regionChosen.gRegion, existedInPool);
if (addedPerPageAddress > 0) {
//only internalAdd lowest chain page once.
addedSize += addedPerPageAddress;
break;
}
}
}
}
cursor++;
if (cursor >= pages.length) {
cursor = 0;
}
if (cursor == regionChosen.cursor) {
//one loop once.
break;
}
}
if (print || addedSize == 0) {
LOG.info(
"tryFillPool({}) scanCount={}, regionChosen={}, cursor={}, addedSize={}, existedPage={} pages.length={}, dataPool={},lastOrderList={}",
evictHandlerSep.getName(),
scanCount,
regionChosen,
cursor,
addedSize,
existedInPool.get(),
pages.length,
dataPoolMap.size(),
lastOrderList.size());
}
regionChosen.cursor = cursor;
return addedSize;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this).
add("name", evictHandlerSep.getName()).
add("poolCount", size()).
add("poolDataSize", dataSize()).
add("lastOrderList", lastOrderList == null ? 0 : lastOrderList.size()).
add("maxDataLen", maxDataLen).
add("lastFillPoolTime", lastFillPoolTime).toString();
}
@Override
public void shutdown() {
fillPoolExecutor.shutdownNow();
}
static class RegionChosen {
private final GRegion gRegion;
int cursor = 0;
long usedMemory = 0;
int assignSize = 0;
RegionChosen(GRegion gRegion) {
this.gRegion = gRegion;
}
GRegion getgRegion() {
return gRegion;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this).
add("gRegion", gRegion.getRegionId()).
add("cursor", cursor).
add("usedMemory", usedMemory).
add("assignSize", assignSize).toString();
}
}
@VisibleForTesting
Map getDataPoolMap() {
return dataPoolMap;
}
@VisibleForTesting
public long getMaxDataLen() {
return maxDataLen;
}
@VisibleForTesting
public long getExtraMaxDataLen() {
return extraMaxDataLen;
}
@VisibleForTesting
public boolean getFillPollRunning() {
return fillPollRunning.get();
}
public boolean isEvictBaseOnPageAddressComposite() {
return evictBaseOnPageAddressComposite;
}
@VisibleForTesting
public Map getRegionCursorMap() {
return regionCursorMap;
}
@VisibleForTesting
public long getLastFillPoolTime() {
return lastFillPoolTime;
}
}